Exemplo n.º 1
0
def main(inputFile='../../sounds/bendir.wav',
         window='hamming',
         M=2001,
         N=2048,
         t=-80,
         minSineDur=0.02,
         maxnSines=150,
         freqDevOffset=10,
         freqDevSlope=0.001):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur,
                                               maxnSines, freqDevOffset,
                                               freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

    # sum sinusoids and residual
    y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_sines.wav'
    outputFileResidual = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_residual.wav'
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel.wav'

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(xr, fs, outputFileResidual)
    UF.wavwrite(y, fs, outputFile)
    return x, fs, mXr, tfreq, y
Exemplo n.º 2
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80, 
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal plus residual analysis
	tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope)
		
	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

	# sum sinusoids and residual
	y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)
	return x, fs, mXr, tfreq, y
Exemplo n.º 3
0
def main(
    path_to_original_dataset='/homedtic/vshenoykadandale/datasets/good-sounds/instruments'
):
    """
    path_to_original_dataset: path to the original dataset whose residuals need to be separated.    
    """
    print("Checkpoint 1")
    path_to_residual_dataset = os.path.join(
        os.path.dirname(path_to_original_dataset), 'residuals')
    if not os.path.exists(path_to_residual_dataset):
        os.umask(
            0
        )  #To mask the permission restrictions on new files/directories being created
        os.makedirs(path_to_residual_dataset,
                    0o755)  # 0o777 gives us full permissions for the folder

    print("Checkpoint 2")
    instruments = sorted(os.listdir(path_to_original_dataset))
    for instrument in instruments:
        instrument_path = os.path.join(path_to_original_dataset, instrument)
        instrument_residual_path = os.path.join(path_to_residual_dataset,
                                                instrument)
        print("Checkpoint 3")
        if not os.path.exists(instrument_residual_path):
            os.umask(
                0
            )  #To mask the permission restrictions on new files/directories being created
            os.makedirs(
                instrument_residual_path,
                0o755)  # 0o777 gives us full permissions for the folder
        files = os.listdir(instrument_path)
        print("Processing [INSTRUMENT] " + instrument +
              " folder which contains " + str(len(files)) + " files")
        for filename in files:
            file_path = os.path.join(instrument_path, filename)
            file_residual_path = os.path.join(instrument_residual_path,
                                              filename[:-4] + '_residual.wav')

            wav = wavio.read(file_path)
            x = wav.data
            fs = wav.rate
            sampwidth = wav.sampwidth
            w = get_window("hamming", 2001)
            N = 2048
            H = 128
            minSineDur = 0.02
            maxnSines = 150
            freqDevOffset = 10
            freqDevSlope = 0.001
            t = -80
            Ns = 512

            # perform sinusoidal plus residual analysis
            tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t,
                                                       minSineDur, maxnSines,
                                                       freqDevOffset,
                                                       freqDevSlope)

            # compute spectrogram of residual
            #mXr, pXr = STFT.stftAnal(xr, w, N, H)

            # sum sinusoids and residual
            #y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

            #wavio.write(outputFileSines, ys, fs, sampwidth)
            wavio.write(file_residual_path, xr, fs, sampwidth=sampwidth)
            #wavio.write(outputFile, y, fs, sampwidth)
    print("Successfully extracted residuals from all the audio files!")
Exemplo n.º 4
0
def main(inputFile='../../sounds/bendir.wav', window='hamming', M=2001, N=2048, t=-80,
	minSineDur=0.02, maxnSines=150, freqDevOffset=10, freqDevSlope=0.001):
	"""
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)
	M: analysis window size
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

	# size of fft used in synthesis
	Ns = 512

	# hop size (has to be 1/4 of Ns)
	H = 128

	# read input sound
	(fs, x) = UF.wavread(inputFile)

	# compute analysis window
	w = get_window(window, M)

	# perform sinusoidal plus residual analysis
	tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur, maxnSines, freqDevOffset, freqDevSlope)

	# compute spectrogram of residual
	mXr, pXr = STFT.stftAnal(xr, w, N, H)

	# sum sinusoids and residual
	y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

	# output sound file (monophonic with sampling rate of 44100)
	outputFileSines = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_sines.wav'
	outputFileResidual = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel_residual.wav'
	outputFile = 'output_sounds/' + os.path.basename(inputFile)[:-4] + '_sprModel.wav'

	# write sounds files for sinusoidal, residual, and the sum
	UF.wavwrite(ys, fs, outputFileSines)
	UF.wavwrite(xr, fs, outputFileResidual)
	UF.wavwrite(y, fs, outputFile)

	# create figure to show plots
	plt.figure(figsize=(12, 9))

	# frequency range to plot
	maxplotfreq = 5000.0

	# plot the input sound
	plt.subplot(3,1,1)
	plt.plot(np.arange(x.size)/float(fs), x)
	plt.axis([0, x.size/float(fs), min(x), max(x)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('input sound: x')

	# plot the magnitude spectrogram of residual
	plt.subplot(3,1,2)
	maxplotbin = int(N*maxplotfreq/fs)
	numFrames = int(mXr[:,0].size)
	frmTime = H*np.arange(numFrames)/float(fs)
	binFreq = np.arange(maxplotbin+1)*float(fs)/N
	plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:,:maxplotbin+1]))
	plt.autoscale(tight=True)

	# plot the sinusoidal frequencies on top of the residual spectrogram
	if (tfreq.shape[1] > 0):
		tracks = tfreq*np.less(tfreq, maxplotfreq)
		tracks[tracks<=0] = np.nan
		plt.plot(frmTime, tracks, color='k')
		plt.title('sinusoidal tracks + residual spectrogram')
		plt.autoscale(tight=True)

	# plot the output sound
	plt.subplot(3,1,3)
	plt.plot(np.arange(y.size)/float(fs), y)
	plt.axis([0, y.size/float(fs), min(y), max(y)])
	plt.ylabel('amplitude')
	plt.xlabel('time (sec)')
	plt.title('output sound: y')


	plt.tight_layout()
	plt.ion()
	plt.show()
Exemplo n.º 5
0
def extractHarmSpec(inputFile='../../sounds/bendir.wav',
                    window='hamming',
                    M=2001,
                    N=2048,
                    t=-80,
                    minSineDur=0.02,
                    maxnSines=150,
                    freqDevOffset=10,
                    freqDevSlope=0.001):
    """
	inputFile: input sound file (monophonic with sampling rate of 44100)
	window: analysis window type (rectangular, hanning, hamming, blackman, blackmanharris)	
	M: analysis window size 
	N: fft size (power of two, bigger or equal than M)
	t: magnitude threshold of spectral peaks 
	minSineDur: minimum duration of sinusoidal tracks
	maxnSines: maximum number of parallel sinusoids
	freqDevOffset: frequency deviation allowed in the sinusoids from frame to frame at frequency 0   
	freqDevSlope: slope of the frequency deviation, higher frequencies have bigger deviation
	"""

    # size of fft used in synthesis
    Ns = 512

    # hop size (has to be 1/4 of Ns)
    H = 128

    # read input sound
    (fs, x) = UF.wavread(inputFile)

    # compute analysis window
    w = get_window(window, M)

    # perform sinusoidal plus residual analysis
    tfreq, tmag, tphase, xr = SPR.sprModelAnal(x, fs, w, N, H, t, minSineDur,
                                               maxnSines, freqDevOffset,
                                               freqDevSlope)

    # compute spectrogram of residual
    mXr, pXr = STFT.stftAnal(xr, fs, w, N, H)

    # sum sinusoids and residual
    y, ys = SPR.sprModelSynth(tfreq, tmag, tphase, xr, Ns, H, fs)

    # output sound file (monophonic with sampling rate of 44100)
    outputFileSines = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_sines.wav'
    outputFileResidual = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel_residual.wav'
    outputFile = 'output_sounds/' + os.path.basename(
        inputFile)[:-4] + '_sprModel.wav'

    # write sounds files for sinusoidal, residual, and the sum
    UF.wavwrite(ys, fs, outputFileSines)
    UF.wavwrite(xr, fs, outputFileResidual)
    UF.wavwrite(y, fs, outputFile)

    # create figure to show plots
    plt.figure(figsize=(12, 9))

    # frequency range to plot
    maxplotfreq = 5000.0

    # plot the input sound
    plt.subplot(3, 1, 1)
    plt.plot(np.arange(x.size) / float(fs), x)
    plt.axis([0, x.size / float(fs), min(x), max(x)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('input sound: x')

    # plot the magnitude spectrogram of residual
    plt.subplot(3, 1, 2)
    maxplotbin = int(N * maxplotfreq / fs)
    numFrames = int(mXr[:, 0].size)
    frmTime = H * np.arange(numFrames) / float(fs)
    binFreq = np.arange(maxplotbin + 1) * float(fs) / N
    plt.pcolormesh(frmTime, binFreq, np.transpose(mXr[:, :maxplotbin + 1]))
    plt.autoscale(tight=True)

    # plot the sinusoidal frequencies on top of the residual spectrogram
    if (tfreq.shape[1] > 0):
        tracks = tfreq * np.less(tfreq, maxplotfreq)
        tracks[tracks <= 0] = np.nan
        plt.plot(frmTime, tracks, color='k')
        plt.title('sinusoidal tracks + residual spectrogram')
        plt.autoscale(tight=True)

    # plot the output sound
    plt.subplot(3, 1, 3)
    plt.plot(np.arange(y.size) / float(fs), y)
    plt.axis([0, y.size / float(fs), min(y), max(y)])
    plt.ylabel('amplitude')
    plt.xlabel('time (sec)')
    plt.title('output sound: y')

    plt.tight_layout()
    plt.show()