def estimate(inputFile='a7q2-harmonic.wav', window='blackman', M=2101, N=4096, t=-90, minSineDur=0.1, nH=50, minf0=100, maxf0=200, f0et=5, harmDevSlope=0.01): Ns = 512 H = 128 fs, x = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # plt.plot(x) # plt.plot(y) # plt.show() size = min([x.size, y.size]) diff = np.sum(np.abs(x[:size] - y[:size])) std = np.std(f0) print "diff:{0} & std:{1}, M={2} N={3} t={4} minSineDur={5} nH={6} min/max={7}/{8} f0et={9} harmDevSlope={10}" \ .format(diff, std, M, N, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope) return diff, std
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes w = get_window(window, M) harmDevSlope = 0.01 minSineDur = 0.0 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the segment in which you need to compute the inharmonicity. b1 = np.ceil(t1 * float(fs) / H) b2 = np.ceil(t2 * float(fs) / H) bhfreq = hfreq[b1:b2] bf0 = f0[b1:b2] # 3. Compute the mean inharmonicity for the segment inhm = np.array([]) for idx, h in enumerate(bhfreq): coef = np.arange(1, h.size + 1) i = np.abs(h - coef * bf0[idx]) / coef inhm = np.append(inhm, np.sum(i) / len(i)) return np.sum(inhm) / len(inhm)
def estimateInharmonicity(inputFile='piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # Read the audio file (fs, x) = UF.wavread(inputFile) w = get_window(window, M) harmDevSlope = 0.01 minSinDur = 0.0 # Use harmonic model to to compute the harmonic frequencies and magnitudes hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSinDur) # Extract the segment in which you need to compute the inharmonicity. l1 = int(np.ceil(t1 * fs / H)) l2 = int(np.ceil(t2 * fs / H)) # Compute the mean inharmonicity for the segment Imean = 0 d = np.array([]) a = np.array([]) frame = np.array([], ndmin=2) for i in range(l1, l2): R = nH I = 0 for r in range(0, R): I += abs((hfreq[i][r] - (r + 1) * hfreq[i][0])) / (r + 1) I = I / R Imean += I Imean = Imean / (l2 - l1) return (Imean)
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes harmDevSlope = 0.01 minSineDur = 0.0 xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the time segment in which you need to compute the inharmonicity. l1 = int(np.ceil(t1 * float(fs) / H)) #frame start l2 = int(np.ceil(t2 * float(fs) / H)) #frame end harmonicsFrame = xhfreq[l1:l2] f0Frame = f0[l1:l2] # 3. Compute the mean inharmonicity of the segment tempInhm = np.array([]) for a, b in enumerate(harmonicsFrame): coefficient = np.arange(1, b.size + 1) inhP = np.abs(b - coefficient * f0Frame[a]) / coefficient tempInhm = np.append(tempInhm, np.sum(inhP) / len(inhP)) meanInhm = np.sum(tempInhm) / len(tempInhm) return meanInhm
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope = 0.01, minSineDur = 0.0) # xhfreq is a list of jagged arrays each array containing the list of harmonics in that frame # 2. Extract the segment in which you need to compute the inharmonicity. start = np.ceil(t1 * fs/H) end = np.floor(t2 * fs/H) hFreq = xhfreq[start:end+1] hMag = xhfreq[start:end+1] # 3. Compute the mean inharmonicity for the segment # NOTE that inharmonicity does nothing with the magnitude, it just looks at the frequency deviation/error # and HFreq[0] is the fundamental f0 R = len(hFreq) print R print np.shape(hFreq) inharmonicity = [] for frame in hFreq: #print frame inh = map(lambda r: abs(frame[r - 1] - (r * frame[0])) / r, np.arange(1, len(frame) + 1)) inharmonicity.append(np.sum(inh)/len(frame)) print "Frame Inharmonicity = " + str(np.sum(inh)/len(frame)) inharmonicity = np.sum(inharmonicity) / (end - start + 1) print "Total Inharmonicity = " + str(inharmonicity) return(inharmonicity)
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes w = get_window(window, M) xhfreq, xhmag, xhphase = HM.harmonicModelAnal( x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the segment in which you need to compute the inharmonicity. (nframes, nharm) = xhmag.shape secs_per_index = (x.size / fs / nframes) start_index = int(np.ceil(t1 / secs_per_index)) end_index = int(np.floor(t2 / secs_per_index)) # 3. Compute the mean inharmonicity for the segment I = np.zeros(nframes) for f in xrange(nframes): inharm = 0.0 r = 0.0 for h in xrange(1, nharm): if xhmag[f, h] != 0: r = (h+1) inharm += abs(xhfreq[f, h] - (h+1)*xhfreq[f, 0]) / (h+1) if r != 0: I[f] = inharm / r Imean = sum(I[i] for i in xrange(start_index, end_index+1)) Imean /= (end_index - start_index + 1) return Imean
def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf): # Analysis of a sound using the harmonic plus stochastic model # x: input sound, fs: sampling rate, w: analysis window, # N: FFT size, t: threshold in negative dB, # nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, # maxf0: maximim f0 frequency in Hz, # f0et: error threshold in the f0 detection (ex: 5), # harmDevSlope: slope of harmonic deviation # minSineDur: minimum length of harmonics # returns: hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; mYst: stochastic residual hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) mYst = UF.stochasticResidual(x, Ns, H, hfreq, hmag, hphase, fs, stocf) return hfreq, hmag, hphase, mYst
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.1, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. l1 = np.floor(t1 * fs / H) l2 = np.floor(t2 * fs / H) hfreq1 = hfreq[l1:l2+1] hmag1 = hmag[l1:l2+1] hphase1 = hphase[l1:l2+1] print(l1, l2) # 3. Compute the mean inharmonicity of the segment inharm = np.zeros(shape=(hfreq1.shape[0])) for l, freq in enumerate(hfreq1): R = freq.shape[0] f0 = freq[0] sum = 0.0 for r in range(1, R + 1): if freq[r - 1] > 0.0: sum += np.abs(freq[r - 1] - r * f0) / r inharm[l] = sum / R meanInharm = np.sum(inharm) / (l2 - l1 + 1) return meanInharm
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x, fs, hfreq, y
def hprModelAnal(x, fs, w, N, H, t, minSineDur, nH, minf0, maxf0, f0et, harmDevSlope): """Analysis of a sound using the harmonic plus residual model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; xr: residual signal """ # perform harmonic analysis hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) Ns = 512 xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # subtract sinusoids from original sound return hfreq, hmag, hphase, xr
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) #estimating F0 # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et) # 2. Extract the time segment in which you need to compute the inharmonicity. starting = int(np.ceil(fs * t1 / H)) ending = int(np.floor(fs * t2 / H)) # 3. Compute the mean inharmonicity of the segment mean_inharmonicity = compute_inharmonicity(xhreq, starting, ending, nH) return mean_inharmonicity
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes w = get_window(window, M) harmDevSlope=0.01 minSineDur=0.0 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) # 2. Extract the segment in which you need to compute the inharmonicity. b1 = np.ceil(t1 * float(fs)/H) b2 = np.ceil(t2 * float(fs)/H) bhfreq = hfreq[b1:b2] bf0 = f0[b1:b2] # 3. Compute the mean inharmonicity for the segment inhm = np.array([]) for idx, h in enumerate(bhfreq): coef = np.arange(1, h.size+1) i = np.abs(h - coef * bf0[idx])/coef inhm = np.append(inhm, np.sum(i) / len(i)) return np.sum(inhm) / len(inhm)
def estimateInharmonicity( inputFile="../../sounds/piano.wav", t1=0.1, t2=0.5, window="hamming", M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10, ): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhfreq, _, _ = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. xhfreq = xhfreq[np.ceil(fs * t1 / H) : np.ceil(fs * t2 / H), :] # 3. Compute the mean inharmonicity of the segment inh = map(inharmonicity, xhfreq) return np.mean(inh)
def estimateInharmonicity(inputFile = '../sms-tools/sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes # xhfreq, xhmag, xhphase = harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=.02) xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et) # 2. Extract the time segment in which you need to compute the inharmonicity. l1 = int(round((t1 * fs + 1 / 2 * H) / H, 0)) # estemated frame ate t1 l2 = int(round((t2 * fs + 1 / 2 * H) / H, 0)) # estemated frame ate t2 xhfreq = xhfreq[l1:l2] xhmag = xhmag[l1:l2] xhphase = xhphase[l1:l2] # 3. Compute the mean inharmonicity of the segment r = np.arange(1, nH + 1) R = nH #r = np.tile(r,(xhfreq.size,1)) # fr = r*f0 sqrt(1 + Br2) I = [] for Ival in range(xhfreq.shape[0]): temp = (np.abs(xhfreq[Ival] - r * xhfreq[Ival, 0])) I = np.append(I, 1 / R * np.sum(temp / r)) meanInharm = 1 / (l2 - l1 + 1) * np.sum(I) return(meanInharm)
def hpsModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur, Ns, stocf): """ Analysis of a sound using the harmonic plus stochastic model x: input sound, fs: sampling rate, w: analysis window; N: FFT size, t: threshold in negative dB, nH: maximum number of harmonics, minf0: minimum f0 frequency in Hz, maxf0: maximim f0 frequency in Hz; f0et: error threshold in the f0 detection (ex: 5), harmDevSlope: slope of harmonic deviation; minSineDur: minimum length of harmonics returns hfreq, hmag, hphase: harmonic frequencies, magnitude and phases; stocEnv: stochastic residual """ # perform harmonic analysis hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # subtract sinusoids from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # perform stochastic analysis of residual stocEnv = STM.stochasticModelAnal(xr, H, H*2, stocf) return hfreq, hmag, hphase, stocEnv
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes xhfreq, _, _ = HM.harmonicModelAnal(x, fs, w, N, \ H, t, nH, minf0, maxf0, f0et, harmDevSlope = 0.01, minSineDur = 0.0) # 2. Extract the segment in which you need to compute the inharmonicity. f1 = int(np.ceil(t1 * fs / H)) f2 = int(np.floor(t2 * fs / H)) hfseg = xhfreq[f1:f2+1] # 3. Compute the mean inharmonicity for the segment inh = np.apply_along_axis(inharmonicity, 1, hfseg) imean = np.sum(inh) / (f2 - f1 + 1) return imean
def NoteHarmonic(FileName): # """ # Analysis and synthesis using the harmonic model # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size; N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation # """ (fs, x) = UF.wavread(FileName) window='blackman' M=1201 N=2048 t=-90 minSineDur=0.1 nH=1 minf0=75 maxf0=500 f0et=20 harmDevSlope=0.01 # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) return hfreq
def main(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analysis and synthesis using the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size; N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks; minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics; minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound; f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics could have higher allowed deviation """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # detect harmonics of input sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the harmonics y = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from harmonic analysis UF.wavwrite(y, fs, outputFile) return x,fs,hfreq,y
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analyze a sound with the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') if (hfreq.shape[1] > 0): plt.subplot(3,1,2) tracks = np.copy(hfreq) numFrames = tracks.shape[0] frmTime = H*np.arange(numFrames)/float(fs) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks) plt.axis([0, x.size/float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
def estimateInharmonicity(inputFile = '../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH = 10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ ### Your code here # 0. Read the audio file fs, x = UF.wavread(inputFile) # 1. Use harmonic model to to compute the harmonic frequencies and magnitudes harmDevSlope = 0.01 minSineDur = 0.0 w = get_window(window, M) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # 2. Extract the segment in which you need to compute the inharmonicity. total_time = float(len(x)) / fs bin1 = np.ceil((t1 / total_time) * len(hfreq)) bin2 = np.floor((t2 / total_time) * len(hfreq)) harm_seg = hfreq[bin1:bin2+1] # 3. Compute the mean inharmonicity for the segment Inharm = [] R = len(harm_seg[0]) ran = np.arange(1, R) #(1/R) * np.sum(harm_seg[:][1:len(harm_seg[0])] - harm_seg[:][0]) for i in range(len(harm_seg)): tot = 0.0 RA = 0 for r in range(R): if harm_seg[i][r] > 0.0: RA += 1 tot += np.abs(harm_seg[i][r] - ((r+1) * harm_seg[i][0])) / float(r+1) Inharm.append((1.0/R) * tot) Inmean = (1.0/(bin2-bin1+1.0)) * np.sum(Inharm) return Inmean
import numpy as np import matplotlib.pyplot as plt from scipy.signal import hamming, hanning, triang, blackmanharris, resample import math import sys, os, time sys.path.append(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../software/models/')) import stft as STFT import utilFunctions as UF import harmonicModel as HM (fs, x) = UF.wavread(os.path.join(os.path.dirname(os.path.realpath(__file__)), '../sounds/cello-double.wav')) w = np.blackman(3501) N = 2048*2 t = -100 nH = 100 minf0 = 140 maxf0 = 150 f0et = 10 minSineDur = .2 harmDevSlope = 0.001 Ns = 512 H = Ns/4 hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) y = HM.harmonicModelSynth(hfreq, hmag, hphase, Ns, H, fs) xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs)
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes harmDevSlope = 0.01 minSineDur = 0.0 xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) print(xhfreq.shape) # 2. Extract the time segment in which you need to compute the inharmonicity. segmentStart = int(np.ceil(t1 * fs / H)) segmentEnd = int(np.ceil(t2 * fs / H)) + 1 xhFreqSegment = xhfreq[segmentStart:segmentEnd] print(segmentStart) print(segmentEnd) print(xhFreqSegment.shape) # 3. Compute the mean inharmonicity of the segment numberOfSamples = xhFreqSegment.shape[0] inharmonicityArr = np.zeros(numberOfSamples) #f0 = xhFreqSegment[0][0] for sample in range(0, numberOfSamples): f0 = xhFreqSegment[sample][0] harmArr = np.array(range(1, nH + 1)) frArr = harmArr * f0 festArr = xhFreqSegment[sample] freqNotFound = 0 for festIx in range(len(festArr)): if festArr[festIx] < eps: festArr[festIx] = 0 frArr[festIx] = 0 freqNotFound += 1 inhArr = np.abs(festArr - frArr) / harmArr inharmonicity = np.sum(inhArr) / (len(inhArr) - freqNotFound) inharmonicityArr[sample] = inharmonicity print("sample " + str(sample)) print("f0 " + str(f0)) print(harmArr) print(frArr) print(festArr) print(inhArr) print(inharmonicity) #for fest in festArr: # if fest < eps: # return "fasza" print("") print(inharmonicityArr) result = np.sum(inharmonicityArr) / len(inharmonicityArr) return result
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): """ Analyze a sound with the harmonic model inputFile: input sound file (monophonic with sampling rate of 44100) window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) M: analysis window size N: fft size (power of two, bigger or equal than M) t: magnitude threshold of spectral peaks minSineDur: minimum duration of sinusoidal tracks nH: maximum number of harmonics minf0: minimum fundamental frequency in sound maxf0: maximum fundamental frequency in sound f0et: maximum error accepted in f0 detection algorithm harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation returns inputFile: input file name; fs: sampling rate of input file, tfreq, tmag: sinusoidal frequencies and magnitudes """ # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound fs, x = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename( inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') if (hfreq.shape[1] > 0): plt.subplot(3, 1, 2) tracks = np.copy(hfreq) numFrames = tracks.shape[0] frmTime = H * np.arange(numFrames) / float(fs) tracks[tracks <= 0] = np.nan plt.plot(frmTime, tracks) plt.axis([0, x.size / float(fs), 0, maxplotfreq]) plt.title('frequencies of harmonic tracks') # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
def analysis(inputFile='../../sounds/vignesh.wav', window='blackman', M=1201, N=2048, t=-90, minSineDur=0.1, nH=100, minf0=130, maxf0=300, f0et=7, harmDevSlope=0.01): # analyze a sound with the harmonic model # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics # minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound # f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation # returns inputFile: input file name; fs: sampling rate of input file, # tfreq, tmag: sinusoidal frequencies and magnitudes # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # compute the magnitude and phase spectrogram of input sound mX, pX = STFT.stftAnal(x, fs, w, N, H) # compute the harmonic model of the whole sound hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # synthesize the sines without original phases y = SM.sineModelSynth(hfreq, hmag, np.array([]), Ns, H, fs) # output sound file (monophonic with sampling rate of 44100) outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_harmonicModel.wav' # write the sound resulting from the inverse stft UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to show plots plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3,1,1) plt.plot(np.arange(x.size)/float(fs), x) plt.axis([0, x.size/float(fs), min(x), max(x)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('input sound: x') # plot the magnitude spectrogram plt.subplot(3,1,2) maxplotbin = int(N*maxplotfreq/fs) numFrames = int(mX[:,0].size) frmTime = H*np.arange(numFrames)/float(fs) binFreq = np.arange(maxplotbin+1)*float(fs)/N plt.pcolormesh(frmTime, binFreq, np.transpose(mX[:,:maxplotbin+1])) plt.autoscale(tight=True) # plot the sinusoidal frequencies on top of the spectrogram tracks = hfreq*np.less(hfreq, maxplotfreq) tracks[tracks<=0] = np.nan plt.plot(frmTime, tracks, color='k') plt.title('magnitude spectrogram + harmonic tracks') plt.autoscale(tight=True) # plot the output sound plt.subplot(3,1,3) plt.plot(np.arange(y.size)/float(fs), y) plt.axis([0, y.size/float(fs), min(y), max(y)]) plt.ylabel('amplitude') plt.xlabel('time (sec)') plt.title('output sound: y') plt.tight_layout() plt.show(block=False) return inputFile, fs, hfreq, hmag
import harmonicModel as HM inputFile = '../../sounds/vignesh.wav' window = 'blackman' M = 1201 N = 2048 t = -90.0 minSineDur = 0.1 nH = 50 minf0 = 130 maxf0 = 300 f0et = 5 harmDevSlope = 0.1 #harmDevSlope = 0.001 # restricts deviation in higher frequencies Ns = 512 H = 128 # 1/4 of Ns fs, x = UF.wavread(inputFile) w = get_window(window, M) hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) numFrames = int(hfreq[:,0].size) frmTime = H * np.arange(numFrames) / float(fs) hfreq[hfreq<=0] = np.nan plt.plot(frmTime, hfreq) plt.show()
def main( inputFile="../../sounds/sax-phrase.wav", window="blackman", M=601, N=1024, t=-100, minSineDur=0.1, nH=100, minf0=350, maxf0=700, f0et=5, harmDevSlope=0.01, ): # ------- analysis parameters ------------------- # inputFile: input sound file (monophonic with sampling rate of 44100) # window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris) # M: analysis window size # N: fft size (power of two, bigger or equal than M) # t: magnitude threshold of spectral peaks # minSineDur: minimum duration of sinusoidal tracks # nH: maximum number of harmonics # minf0: minimum fundamental frequency in sound # maxf0: maximum fundamental frequency in sound # f0et: maximum error accepted in f0 detection algorithm # harmDevSlope: allowed deviation of harmonic tracks, higher harmonics have higher allowed deviation # size of fft used in synthesis Ns = 512 # hop size (has to be 1/4 of Ns) H = 128 # --------- computation ----------------- # read input sound (fs, x) = UF.wavread(inputFile) # compute analysis window w = get_window(window, M) # find harmonics hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # subtract harmonics from original sound xr = UF.sineSubtraction(x, Ns, H, hfreq, hmag, hphase, fs) # compute spectrogram of residual mXr, pXr = STFT.stftAnal(xr, fs, w, N, H) # synthesize harmonic component yh = SM.sineModelSynth(hfreq, hmag, hphase, Ns, H, fs) # sum harmonics and residual y = xr[: min(xr.size, yh.size)] + yh[: min(xr.size, yh.size)] # output sound file (monophonic with sampling rate of 44100) outputFileSines = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_sines.wav" outputFileResidual = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel_residual.wav" outputFile = "output_sounds/" + os.path.basename(inputFile)[:-4] + "_hprModel.wav" # write sounds files for harmonics, residual, and the sum UF.wavwrite(yh, fs, outputFileSines) UF.wavwrite(xr, fs, outputFileResidual) UF.wavwrite(y, fs, outputFile) # --------- plotting -------------------- # create figure to plot plt.figure(figsize=(12, 9)) # frequency range to plot maxplotfreq = 5000.0 # plot the input sound plt.subplot(3, 1, 1) plt.plot(np.arange(x.size) / float(fs), x) plt.axis([0, x.size / float(fs), min(x), max(x)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("input sound: x") # plot the magnitude spectrogram of residual plt.subplot(3, 1, 2) maxplotbin = int(N * maxplotfreq / fs) numFrames = int(mXr[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) binFreq = np.arange(maxplotbin + 1) * float(fs) / N plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, : maxplotbin + 1])) plt.autoscale(tight=True) # plot harmonic frequencies on residual spectrogram harms = hfreq * np.less(hfreq, maxplotfreq) harms[harms == 0] = np.nan numFrames = int(harms[:, 0].size) frmTime = H * np.arange(numFrames) / float(fs) plt.plot(frmTime, harms, color="k", ms=3, alpha=1) plt.xlabel("time(s)") plt.ylabel("frequency(Hz)") plt.autoscale(tight=True) plt.title("harmonics + residual spectrogram") # plot the output sound plt.subplot(3, 1, 3) plt.plot(np.arange(y.size) / float(fs), y) plt.axis([0, y.size / float(fs), min(y), max(y)]) plt.ylabel("amplitude") plt.xlabel("time (sec)") plt.title("output sound: y") plt.tight_layout() plt.show()
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. startFrame = int(np.ceil(t1 * fs / H)) endFrame = int(np.floor(t2 * fs / H)) fSeg = hfreq[startFrame:endFrame] # 3. Compute the mean inharmonicity of the segment row, col = fSeg.shape I = np.zeros(row) for l in range(row): nonZeroFreqs = np.where(fSeg[l, :] > 0.0)[0] nonZeroFreqs = np.delete(nonZeroFreqs, 0) for r in nonZeroFreqs: I[l] += np.abs(fSeg[l, r] - (r + 1) * fSeg[l, 0]) / float(r + 1) I[l] = I[l] / nH Imean = 1.0 / (endFrame - startFrame) * np.sum(I) return Imean
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window (fs, x) = UF.wavread(inputFile) #reading inputFile w = get_window(window, M) #obtaining analysis window # 1. Use harmonic model to compute the harmonic frequencies and magnitudes harmDevSlope = 0.01 minSineDur = 0.0 xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope, minSineDur) # 2. Extract the time segment in which you need to compute the inharmonicity. totalTime = x.size / float(fs) startBin = int(np.ceil((t1 / totalTime) * len(xhfreq))) lastBin = int(np.ceil((t2 / totalTime) * len(xhfreq))) segment = xhfreq[startBin:lastBin] # 3. Compute the mean inharmonicity of the segment def inharmonicity(frame): count = 0.0 for i in range(frame.size): count += np.abs(frame[i] - ((i + 1) * frame[0])) / (i + 1) return count / frame.size totalInharmonicity = 0.0 for frame in segment: totalInharmonicity += inharmonicity(frame) meanInharmonicity = totalInharmonicity / len(segment) return meanInharmonicity
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. frameStart = int(np.ceil( t1 * fs / float(H))) #get starting frame, round up and convert to int frameEnd = int( t2 * fs / float(H)) #get ending frame, round down by converting to int # 3. Compute the mean inharmonicity of the segment meanInharm = 0.0 sum = 0.0 #store inharmonic of 1 frame for i in range(frameStart, frameEnd + 1): #iterate through frames sum = 0.0 #reset sum for a new frame for j in range(1, nH): sum += abs(xhfreq[i, j] - (j + 1) * xhfreq[i, 0]) / (j + 1) sum /= nH #divide sum by no. of harmonics meanInharm += sum meanInharm /= (frameEnd - frameStart + 1) return meanInharm
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=0.1, t2=0.5, window='hamming', M=2048, N=2048, H=128, f0et=5.0, t=-90, minf0=130, maxf0=180, nH=10): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhfreq, xhmag, xhphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. interval_start = int(math.ceil(t1 * fs / float(H))) interval_end = int(math.ceil(t2 * fs / float(H))) # 3. Compute the mean inharmonicity of the segment # Refer to the pdf for the formulas used f0 = HM.f0Detection(x, fs, w, N, H, t, minf0, maxf0, f0et) f0_slice = f0[interval_start:interval_end] sliced = xhfreq[interval_start:interval_end] inharmon = np.zeros(sliced.size) for index, arr in enumerate(sliced): tmp_sum = 0 for j in range(1, arr.size): val = j + 1 tmp_sum += np.abs(arr[j] - val * f0_slice[index]) / float(val) inharmon[index] = tmp_sum * (1 / float(nH)) mean_inharmon = sum(inharmon) / (interval_end - interval_start + 1) return mean_inharmon
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=2.3, t2=2.55, window='hamming', M=2047, N=2048, H=128, f0et=5.0, t=-90, minf0=230, maxf0=290, nH=15): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window fs, x = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes xhfreq, xhmag, xhphas = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. start = int(np.ceil(t1 * fs / float(H))) end = int(np.floor(t2 * fs / float(H))) frame_list = [start, end] print(start, end) # 3. Compute the mean inharmonicity of the segment l = [] for val in range(start, end + 1): frame = xhfreq[val] #print(val) sum = 0 count = 1 for index, freq in enumerate(frame): #print(index) if index == 0: f0_freq = freq #print(f0_freq) else: if (freq != 0.0): count += 1 sum = (sum + abs(freq - (index + 1) * f0_freq) / float(index + 1)) l.append(sum / float(count)) Imean = (np.sum(l)) / float(end - start + 1) return (Imean)
def estimateInharmonicity(inputFile='../../sounds/piano.wav', t1=2.55, t2=2.8, window='hamming', M=2047, N=2048, H=128, f0et=5.0, t=-90, minf0=230, maxf0=290, nH=5): """ Function to estimate the extent of inharmonicity present in a sound Input: inputFile (string): wav file including the path t1 (float): start time of the segment considered for computing inharmonicity t2 (float): end time of the segment considered for computing inharmonicity window (string): analysis window M (integer): window size used for computing f0 contour N (integer): FFT size used for computing f0 contour H (integer): Hop size used for computing f0 contour f0et (float): error threshold used for the f0 computation t (float): magnitude threshold in dB used in spectral peak picking minf0 (float): minimum fundamental frequency in Hz maxf0 (float): maximum fundamental frequency in Hz nH (integer): number of integers considered for computing inharmonicity Output: meanInharm (float or np.float): mean inharmonicity over all the frames between the time interval t1 and t2. """ # 0. Read the audio file and obtain an analysis window (fs, x) = UF.wavread(inputFile) w = get_window(window, M) # 1. Use harmonic model to compute the harmonic frequencies and magnitudes hfreq, hmag, hphase = HM.harmonicModelAnal(x, fs, w, N, H, t, nH, minf0, maxf0, f0et, harmDevSlope=0.01, minSineDur=0.0) # 2. Extract the time segment in which you need to compute the inharmonicity. I = np.zeros(hfreq.size / nH) for i in range((hfreq.size / nH)): for j in range(1, nH + 1): I[i] = I[i] + abs(hfreq[i, j - 1] - j * hfreq[i, 0]) / j I[i] = I[i] / nH # 3. Compute the mean inharmonicity of the segments l2 = int(np.floor(t2 * fs / (x.size / (hfreq.size / nH)))) l1 = int(np.ceil(t1 * fs / (x.size / (hfreq.size / nH)))) Iseg = I[l1:l2 + 1] Imean = sum(Iseg) / (l2 - l1 + 1) return Imean