def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "Bach_prelude_40s.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0.1, maskSize=10) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) import matplotlib.pyplot as plt plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration")
def get_audio(filepath, seg_start, seg_duration, targetfs=None, verbose=True): """ for use only with wav files from rwc database """ # rewriting using scikits.audiolab import scikits.audiolab as audiolab # small hack search for alternate if not op.exists(filepath): filepath = op.splitext(filepath)[0] + '.wav' if not op.exists(filepath): filepath = op.splitext(filepath)[0] + '.WAV' sndfile = audiolab.Sndfile(filepath, 'r') fs = sndfile.samplerate (n, c) = (sndfile.nframes, sndfile.channels) if verbose: print "Reading" # initalize position sndfile.seek(int(seg_start * fs), 0, 'r') audiodata = sndfile.read_frames(int(seg_duration * fs)) sndfile.close() if verbose: print "Done" if targetfs is not None and not (targetfs == fs): if verbose: print "Resampling" sig = Signal(audiodata, fs) sig.resample(targetfs) audiodata = sig.data fs = targetfs if verbose: print "Done" return audiodata, fs
def expe1(): shifts = [0,] # in samples fgpts = [] for shift in shifts: sig = Signal(audio_test_file, normalize=True, mono=True) sig.crop(shift, shift+L) sk = sketch.CorticoIHTSketch() sk.recompute(sig) sk.sparsify(100) fgpts.append(sk.fgpt()) # sk.represent() # plt.suptitle("Shift of %2.2f sec"%(float(shift)/float(fs))) colors = ['b', 'r', 'c','m'] score = [] bin_nnz_ref = np.flatnonzero(fgpts[0]) #plt.figure() for i, fgpt in enumerate(fgpts): bin_nnz = np.flatnonzero(fgpt) # plt.stem(bin_nnz,[1]*len(bin_nnz), colors[i]) score.append(len(np.intersect1d(bin_nnz_ref, bin_nnz, assume_unique=True))) print score
def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico, useC=False) spreadDico = mdct_dico.SpreadDico( dico, all_scales=False, spread_scales=[1024, 8192], penalty=0.1, mask_time=2, mask_freq=2 ) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration") # plt.savefig(name + '_decayTFMasking.eps') plt.figure() for blockI in range(1, 3): block = spreadDico.blocks[blockI] plt.subplot(2, 2, blockI) print block.mask.shape, block.mask.shape[0] / (block.scale / 2), block.scale / 2 plt.imshow( np.reshape(block.mask, (block.mask.shape[0] / (block.scale / 2), block.scale / 2)), interpolation="nearest", aspect="auto", ) plt.colorbar() plt.subplot(2, 2, blockI + 2) # print block.mask.shape, block.mask.shape[0] / (block.scale/2), # block.scale/2 block.im_proj_matrix() plt.colorbar()
def gen_chirp_sig(freqs, L, fs, octave=2): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) for fbase in freqs: f = np.linspace(fbase, fbase*(2**octave), L) data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True)
def runTest(self): """ Creating and manipulating a corticogram """ sig = Signal(audio_test_file, mono=True, normalize=True) # convert to auditory gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) gram.build_aud() # Cortico-gram : 2D complex transform of y5 # we need to define y = gram.y5, para1= vector pf parameters, rv = rate vector, sv = scale vector y = np.array(gram.y5).T cort = cochleo_tools.Corticogram(gram) cort.build_cor() cort.invert() rec_aud = cort.rec plt.figure() plt.subplot(121) plt.imshow(np.abs(rec_aud)) plt.subplot(122) plt.imshow(np.abs(y)) plt.show() rec_aud *= np.max(y) / np.max(rec_aud) print np.linalg.norm(np.abs(y) - np.abs(rec_aud), 'fro') self.assertTrue( np.linalg.norm(np.abs(y) - np.abs(rec_aud), 'fro') < 0.8 * np.linalg.norm(y))
def expe2(): sig = gen_chirp_sig([440.0, 512.0], L, fs) sig = Signal('/sons/sqam/voicemale.wav', mono=True, normalize=True) #sig, c = gen_vibrato_sig([440.0,], L, fs, rate=10,ratio=0.1) sk = sketch.CorticoSketch() sk.recompute(sig) plt.figure() plt.subplot(121) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(122) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:])) plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[-1,0,:,:]), axis=0)) #plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:].T)) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[0,-1,:,:]), axis=1)) plt.show()
def runTest(self): sig = Signal(audio_test_file, mono=True, normalize=True) gram = cochleo_tools.Cochleogram(sig.data) gram._toy2() rec_data = gram.invert_y2()
def runTest(self): # create a SpreadDico pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(2048) dico = [128, 1024, 8192] parallelProjections.initialize_plans(np.array(dico), np.array([2] * len(dico))) classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0, maskSize=3) self.assertEqual(spreadDico.mask_times, [3, 3, 3]) classicDIco.initialize(pySig) spreadDico.initialize(pySig) classicDIco.update(pySig, 2) spreadDico.update(pySig, 2) classicAtom1 = classicDIco.get_best_atom(0) spreadAtom1 = spreadDico.get_best_atom(0) # print classicAtom1, spreadAtom1 self.assertEqual(classicAtom1, spreadAtom1) pySig.subtract(classicAtom1) classicDIco.update(pySig, 2) spreadDico.update(pySig, 2) classicAtom2 = classicDIco.get_best_atom(0) spreadAtom2 = spreadDico.get_best_atom(0) self.assertNotEqual(classicAtom2, spreadAtom2)
def gen_vibrato_sig(freqs, L, fs, octave=2, rate=0.1, ratio=0.1): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) from scipy.special import sici for fbase in freqs: # merci wolfram f = fbase*(1.0 + ratio*sici(2.0*np.pi*rate*x)[1]) data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True), f*x
def get_stft(x, wsize=512, tstep=256, sigma=None): """ if necessary load the wav file and get the stft""" if isinstance(x, str): sig = Signal(x, mono=True, normalize=True) x = sig.data if sigma is not None: x += sigma * np.random.randn(*x.shape) return np.squeeze(stft.stft(x, wsize, tstep))
def save_audio(learntype, np, test_file, n_feat, rescale_str, sigout, fs, norm_segments=False): """ saving output vector to an audio wav""" norm_str = '' if norm_segments: norm_str = 'normed' mean_energy = np.mean( [np.sum(sig**2) / float(len(sig)) for sig in sigout]) for sig in sigout: sig /= np.sum(sig**2) / float(len(sig)) sig *= mean_energy rec_sig = Signal(np.concatenate(sigout), fs, normalize=True) rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' % (outputpath, os.path.split(test_file)[-1], learntype, n_feat, rescale_str, norm_str))
def expe_2(): sig_1_path = '/sons/voxforge/main/Learn/cmu_us_slt_arctic/wav/arctic_a0372.wav' sig_2_path = '/sons/voxforge/main/Learn/cmu_us_rms_arctic/wav/arctic_a0372.wav' i = 0 for sig_path in [sig_1_path, sig_2_path]: synth_sig = Signal(sig_path, normalize=True, mono=True) #synth_sig.crop(0.1*synth_sig.fs, 3.5*synth_sig.fs) #synth_sig.resample(32000) plt.figure(figsize=(10, 10)) plt.subplot(211) plt.plot( np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data) plt.xticks([]) plt.ylim([-1, 1]) plt.grid() plt.subplot(212) synth_sig.spectrogram(1024, 64, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'voice_%d_spectro.pdf' % i)) synth_sig.write(op.join(audio_output_path, 'voice_%d_spectro.wav' % i)) i += 1
def runTest(self): pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(2048) scale = 1024 parallelProjections.initialize_plans(np.array([scale]), np.array([2])) classicBlock = mdct_block.Block(scale, pySig, 0, debug_level=3) spreadBlock = mdct_block.SpreadBlock(scale, pySig, 0, debug_level=3, penalty=0, maskSize=5) # compute the projections, should be equivalent classicBlock.update(pySig, 0, -1) spreadBlock.update(pySig, 0, -1) maxClassicAtom1 = classicBlock.get_max_atom() print maxClassicAtom1.length, maxClassicAtom1.frame, print maxClassicAtom1.freq_bin, maxClassicAtom1.mdct_value maxSpreadcAtom1 = spreadBlock.get_max_atom() print maxSpreadcAtom1.length, maxSpreadcAtom1.frame, print maxSpreadcAtom1.freq_bin, maxSpreadcAtom1.mdct_value # assert equality using the inner comparison method of MDCT atoms self.assertEqual(maxClassicAtom1, maxSpreadcAtom1) # verifying the masking index construction mask_frame_width = 2 mask_bin_width = 1 spreadBlock.compute_mask(maxSpreadcAtom1, mask_bin_width, mask_frame_width, 0.5) c_frame = int(np.ceil(maxSpreadcAtom1.time_position / (scale / 2))) c_bin = int(maxSpreadcAtom1.reduced_frequency * scale) z1 = np.arange(int(c_frame - mask_frame_width), int(c_frame + mask_frame_width) + 1) z2 = np.arange(int(c_bin - mask_bin_width), int(c_bin + mask_bin_width) + 1) # x, y = np.meshgrid(z1, z2) # print spreadBlock.mask_index_x # np.testing.assert_array_equal(spreadBlock.mask_index_x, z1) # np.testing.assert_array_equal(spreadBlock.mask_index_y, z2) pySig.subtract(maxSpreadcAtom1) # recompute the projections classicBlock.update(pySig, 0, -1) spreadBlock.update(pySig, 0, -1) # plt.show() maxClassicAtom2 = classicBlock.get_max_atom() print maxClassicAtom2.length, maxClassicAtom2.frame, maxClassicAtom2.freq_bin, maxClassicAtom2.mdct_value maxSpreadcAtom2 = spreadBlock.get_max_atom() print maxSpreadcAtom2.length, maxSpreadcAtom2.frame, maxSpreadcAtom2.freq_bin, maxSpreadcAtom2.mdct_value self.assertNotEqual(maxClassicAtom2, maxSpreadcAtom2) parallelProjections.clean_plans()
def save_audio_full_ref(learntype, test_file, n_feat, rescale_str, sigout, fs, norm_segments=False): """ do not cut the sounds """ # first pass for total length max_idx = int(sigout[-1][1] + len(sigout[-1][0])) + 4 * fs print "total length of ", max_idx sig_data = np.zeros((max_idx, )) # seg_energy = np.sum(sigout[-1][0]**2) for (sig, startidx) in sigout: # print sig.shape, sig_data[int(startidx):int(startidx)+sig.shape[0]].shape sig_data[int(startidx):int(startidx) + sig.shape[0]] += sig #*seg_energy/np.sum(sig**2) rec_sig = Signal(sig_data, fs, normalize=True) rec_sig.write('%s/%s_with%s_%dfeats_%s%s.wav' % (outputpath, os.path.split(test_file)[-1], learntype, n_feat, rescale_str, 'full_ref'))
def runTest(self): # test bad call sig = Signal(audio_test_file) gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) self.assertRaises(NotImplementedError, gram.build_aud) sig = Signal(audio_test_file, mono=True, normalize=True) gram = cochleo_tools.Cochleogram(sig.data, load_coch_filt=True) gram.build_aud() gram.plot_aud() t = time.clock() init_rec_data = gram.init_inverse() rec_data = gram.invert(init_vec=init_rec_data, nb_iter=10, display=False) print "Elapsed :", time.clock() - t min_error = min([ np.sum((rec_data - sig.data[0:rec_data.shape[0]])**2), np.sum((rec_data + sig.data[0:rec_data.shape[0]])**2) ])
def recons_save_fig_audio(magspec, target_name, n_max_frames, fs=22050, format=(8, 3), nb_gl_iter=30): init_vec = np.random.randn(128 * n_max_frames) x_recon = transforms.gl_recons(magspec[:, :n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) rec_sig = Signal(x_recon, fs, normalize=True) rec_sig.write(os.path.join(output_audio_path, '%s.wav' % target_name)) plt.figure(figsize=format) rec_sig.spectrogram(512, 128, order=1, log=True, cmap=cm.jet, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s.png' % target_name))
def expe_1(): synth_sig = Signal(audio_test_file, normalize=True, mono=True) synth_sig.crop(0.1 * synth_sig.fs, 3.5 * synth_sig.fs) #synth_sig.resample(32000) plt.figure(figsize=(10, 5)) plt.subplot(211) plt.plot( np.arange(.0, synth_sig.length) / float(synth_sig.fs), synth_sig.data) plt.xticks([]) plt.ylim([-1, 1]) plt.grid() plt.subplot(212) synth_sig.spectrogram(1024, 64, order=0.25, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'glocs_spectro.pdf')) plt.show()
mpl.rcParams['font.size'] = 16.0 mpl.rcParams['legend.fancybox'] = True mpl.rcParams['legend.shadow'] = True mpl.rcParams['image.interpolation'] = 'Nearest' #mpl.rcParams['text.usetex'] = True from PyMP import Signal, mp from PyMP.mdct import Dico sizes = [128, 1024, 8192] n_atoms = 1000 abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) # taking only the first musical phrase (3.5 seconds approximately) sig.crop(0, 3.5 * sig.fs) sig.pad(8192) # add some minor noise to avoid null areas sig.data += 0.0001 * np.random.randn(sig.length) # create MDCT multiscale dictionary dico = Dico(sizes) # run the MP routine approx, decay = mp.mp(sig, dico, 50, n_atoms) # plotting the results
import matplotlib.pyplot as plt import os from PyMP import Signal, mp, mp_coder from PyMP.mdct import Dico abPath = os.path.abspath("../../data/") sig = Signal(abPath + "/ClocheB.wav", mono=True) # Load Signal sig.crop(0, 4.0 * sig.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * sig.fs / 1000) for s in (8, 64, 512)] # Dictionary for Standard MP pyDico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations mpApprox, mpDecay = mp.mp(sig, pyDico, 20, 2000) # mpApprox.atomNumber SNR, bitrate, quantizedApprox = mp_coder.simple_mdct_encoding(mpApprox, 2000, Q=14) quantizedApprox.plot_tf() plt.show()
def runTest(self): ''' take the base previously constructed and retrieve the song index based on 200 atoms/seconds ''' print "------------------ Test6 recognition ---------" nbCandidates = 8 ppdb = STFTPeaksBDB('LargeSTFTdb.db', load=True) print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str( ppdb.get_stats()['ndata']) # Now take a song, decompose it and try to retrieve it fileIndex = 6 RandomAudioFilePath = file_names[fileIndex] print 'Working on ' + str(RandomAudioFilePath) pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True) pyDico = LODico(sizes) segDuration = 5 offsetDuration = 7 offset = offsetDuration * pySig.fs nbAtom = 50 segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] pySig.crop(offset, offset + segmentLength) approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True) # plt.figure() # approx.plotTF() # plt.show() res = map(ppdb.get, map(ppdb.kform, approx.atoms), [(a.time_position - pyDico.get_pad()) / approx.fs for a in approx.atoms]) # #res = map(bdb.get, map(bdb.kform, approx.atoms)) histogram = np.zeros((600, nbCandidates)) for i in range(approx.atom_number): print res[i] histogram[res[i]] += 1 max1 = np.argmax(histogram[:]) Offset1 = max1 / nbCandidates estFile1 = max1 % nbCandidates # candidates , offsets = ppdb.retrieve(approx); # print approx.atom_number histograms = ppdb.retrieve(approx, offset=0, nbCandidates=8) # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) , # np.argmax(histograms, axis=1) # plt.figure() # plt.imshow(histograms[0:20,:],interpolation='nearest') # plt.show() maxI = np.argmax(histograms[:]) OffsetI = maxI / nbCandidates estFileI = maxI % nbCandidates print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI import matplotlib.pyplot as plt # plt.figure(figsize=(12,6)) # plt.subplot(121) # plt.imshow(histograms,aspect='auto',interpolation='nearest') # plt.subplot(122) # plt.imshow(histogram,aspect='auto',interpolation='nearest') ## plt.imshow(histograms,aspect='auto',interpolation='nearest') ## plt.colorbar() # plt.show() print maxI, OffsetI, estFileI self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms)) self.assertEqual(fileIndex, estFileI) self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
learn_magspecs_all = lstruct['learn_magspecs_all'] learn_files = lstruct['learn_files'] add_col_str = 'add_%s_col' % learn_magspecs_all.shape[0] learn_feats = np.concatenate((learn_feats, learn_feats_all)) learn_magspecs = np.concatenate((learn_magspecs, learn_magspecs_all)) test_feats = Feats[start_t_frame:start_t_frame + nb_test_frames, :] test_magspecs = Feats[start_t_frame:start_t_frame + nb_test_frames, :] learn_sample = learn_ratio * Datas.shape[0] start_l_sample = start_l_ratio * Datas.shape[0] test_sample = test_ratio * Datas.shape[0] start_t_sample = start_t_ratio * Datas.shape[0] ref_learn_data = Datas[start_l_sample:start_l_sample + learn_sample] sig_learn_ref = Signal(ref_learn_data, sr) ref_test_data = Datas[start_t_sample:start_t_sample + test_sample] sig_test_ref = Signal(ref_test_data, sr) nb_median = 5 nb_iter_gl = 20 l_medfilt = 1 params = {} params['win_size'] = int(wintime * sr) params['step_size'] = int(steptime * sr) res_array = regression.eval_knn(learn_feats, learn_magspecs, test_feats, test_magspecs, ref_test_data, nb_median, nb_iter_gl, l_medfilt, params) output_path = '/home/manu/workspace/audio-sketch/src/results/'
for c in range(n_distance.shape[0]): print "Cand %d: " % c, learn_feats[cands[c], 12:15], distance[c] cand_DeltaL = learn_feats[cands[c], 13] - learn_feats[cands[c], 12] n_distance[c] += lambda_L * np.abs(cand_DeltaL - DeltaL) if cand_DeltaL < thresh_lambda and forceAttack: n_distance[c] = 0 b_c = np.argmin(n_distance) print "New best candidate is %d score of %1.4f" % ( b_c, n_distance[b_c]), n_distance return cands[b_c], n_distance[b_c] # load the audio data and the features audio_file_path = '/sons/rwc/Learn/rwc-g-m01_1.wav' output_path = '/home/manu/workspace/audio-sketch/src/results/audio' orig_sig = Signal(audio_file_path) test_file = 'rwc-g-m01_1' h5_file_path = '/sons/rwc/Learn/hdf5/rwc-g-m01_1.h5' feats = [] segs = [] get_ten_features_from_file(feats, segs, [], h5_file_path) # plot part of the audio and teh segmentation seg_starts = segs[0][0] seg_duration = np.diff(seg_starts) nseg = 100 max_time = seg_starts[nseg] + seg_duration[nseg] fs = orig_sig.fs
def runTest(self): print "------------------ Test3 Populate from a true pair of peaks ---------" fileIndex = 2 RandomAudioFilePath = file_names[fileIndex] print 'Working on %s' % RandomAudioFilePath sizes = [2**j for j in range(7, 15)] segDuration = 5 nbAtom = 20 pySig = Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True, normalize=True) segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] nbSeg = floor(pySig.length / segmentLength) # cropping pySig.crop(0, segmentLength) # create the sparsified matrix of peaks # the easiest is to use the existing PeakPicking in sketch from classes import sketch sk = sketch.STFTPeaksSketch() sk.recompute(pySig) sk.sparsify(100) fgpt = sk.fgpt(sparse=True) ppdb = STFTPeaksBDB('STFTPeaksdb.db', load=False) # ppdb.keyformat = None # compute the pairs of peaks peak_indexes = np.nonzero(fgpt[0, :, :]) # Take one peak peak_ind = (peak_indexes[0][2], peak_indexes[1][2]) f_target_width = 2 * sk.params['f_width'] t_target_width = 2 * sk.params['t_width'] import matplotlib.pyplot as plt plt.figure() plt.imshow( np.log( np.abs(fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]))) target_points_i, target_points_j = np.nonzero( fgpt[0, peak_ind[0]:peak_ind[0] + f_target_width, peak_ind[1]:peak_ind[1] + t_target_width]) # now we can build a pair of peaks , and thus a key f1 = (float(peak_ind[0]) / sk.params['scale']) * pySig.fs f2 = (float(peak_ind[0] + target_points_i[1]) / sk.params['scale']) * pySig.fs delta_t = float(target_points_j[1] * sk.params['step']) / float( pySig.fs) t1 = float(peak_ind[1] * sk.params['step']) / float(pySig.fs) key = (f1, f2, delta_t) print key, t1 ppdb.populate(sk.fgpt(), sk.params, fileIndex) nKeys = ppdb.get_stats()['ndata'] # compare the number of keys in the base to the number of atoms # print ppdb.get_stats() self.assertEqual(nKeys, 116) # now try to recover the fileIndex knowing one key T, fileI = ppdb.get(key) self.assertEqual(fileI[0], fileIndex) Tpy = np.array(T) print Tpy self.assertTrue((np.abs(Tpy - t1)).min() < 0.5) # last check: what does a request for non-existing atom in base return? T, fileI = ppdb.get((11, 120.0, 0.87)) self.assertEqual(T, []) self.assertEqual(fileI, []) # now let's just retrieve the atoms from the base and see if they are # the same histograms = ppdb.retrieve(fgpt, sk.params) # plt.figure() # plt.imshow(histograms[0:10,:]) # plt.show() del ppdb
filter_key= None, t_name=genre, n_learn_max = 1000) # Recover the dev data in Solo Piano case l_feats_piano, l_segments_piano, n_learn_piano = get_learns_multidir(solo_piano_dirs, filter_key= None, t_name=genre, n_learn_max = 1000) # Loading the reference and Ellis reconstruction print t_path + '/' + t_name + '.au' orig, fs = get_audio(t_path + '/' + t_name + '.au', 0, target_duration, targetfs=22050) sig_orig = Signal(orig, fs, normalize=True) #sig_orig.write(op.join(recons_audio_path, '_original.wav')) save_fig_audio(sig_orig, recons_audio_path, recons_fig_path, "original") print "Working on %s duration of %2.2f"%(t_name, np.sum(t_seg_duration[:nb_max_seg])) orig_spec = np.abs(stft.stft(orig, 512,128)[0,:,:]) Lmin = orig_spec.shape[1] sig_ellis = Signal('%sellis_resynth%s.wav'%(output_audio_path,t_name), normalize=True) #sig_ellis.write(op.join(recons_audio_path, '_ellisrec.wav')) save_fig_audio(sig_ellis, recons_audio_path, recons_fig_path, "ellisrec")
print "Loading ", filepath signalin, fs = get_audio(filepath, ref_audio_start, ref_audio_duration) target_length = target_audio_duration*fs print "Loaded %s length of %d "%( filepath, len(signalin)) print "Stretching to %2.2f"%target_length # adjust the Loudness ? if rescale: rescale_str = 'normed' signalin = signalin.astype(float) signalin /= 8192.0 signalin /= np.max(signalin) # N = float(len(signalin)) # target_loudness = test_feats[test_seg_idx, 13] # adjust = target_loudness - 10*np.log10((1.0/N)*np.sum(signalin**2)) # signalin *= 10**(adjust/10.) signalin *= 8192.0 signalin = signalin.astype(np.int16) sigout[num_neigh].append(time_stretch(signalin, tscale, wsize=1024, tstep=128)[128:-1024]) for num_neigh in range(n_neighbs): rec_sig = Signal(np.concatenate(sigout[num_neigh]), fs, normalize=True) rec_sig.write('/home/manu/workspace/audio-sketch/src/results/audio/%s_with%s_%dfeats_%s_neighbor_%d.wav'%( os.path.split(test_file)[-1], learntype, n_feat, rescale_str, num_neigh))
synth_sig.spectrogram(1024, 64, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'voice_%d_spectro.pdf' % i)) synth_sig.write(op.join(audio_output_path, 'voice_%d_spectro.wav' % i)) i += 1 sig_1_path = '/sons/voxforge/main/Learn/cmu_us_slt_arctic/wav/arctic_a0372.wav' sk = sketch.STFTPeaksSketch(**{'scale': 256, 'step': 128}) sk2 = sketch.STFTPeaksSketch(**{'scale': 4096, 'step': 512}) sk.recompute(Signal(sig_1_path, mono=True)) sk2.recompute(Signal(sig_1_path, mono=True)) sk.sparsify(1000) sk2.sparsify(1000) sparse_sig = sk.synthesize(sparse=True) sparse_sig2 = sk2.synthesize(sparse=True) plt.figure() #plt.subplot(211) sparse_sig.spectrogram(256, 128, order=0.5, log=False, cmap=cm.hot, cbar=False) plt.savefig(op.join(figure_output_path, 'STFTPeaks_voice_256.pdf')) sparse_sig.write(op.join(audio_output_path, 'STFTPeaks_voice_256.wav')) plt.figure() sparse_sig2.spectrogram(256, 128, order=0.5,
def find_indexes(startIdx, array, stopvalue): """ get the indexes in the (sorted) array such that elements are smaller than value """ idxset = [] idx = startIdx while idx <= array.shape[0] - 1 and array[idx] < stopvalue: idxset.append(idx) idx += 1 # print idx, array[idx] return idxset original = Signal(audiofile, mono=True) max_duration = 20 # in seconds original.crop(0, max_duration * original.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(original.data, wsize, tstep) learn_specs = learn_specs.T # Read the features in the h5 file h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) C = hdf5_getters.get_segments_pitches(h5)
plt.show() ############## DEBUG Part # why do we have some Nans ? #scores = do_feat_invert_test1(1, # 100000, 20, 0.032, # 16000, [5,], # [5], test_filepath) l_specs, l_feats = load_learned_database(50000, 1, 0.032, 7) knn = NearestNeighbors(n_neighbors=3) knn.fit(l_feats) t_specs, t_feats, t_data = load_test_datas(test_filepath, 0.032, 7) distance, neighbs = knn.kneighbors(t_feats, n_neighbors=5, return_distance=True) x_recon = reconstruct(l_specs, t_specs, neighbs, 5, int(0.032 * 16000), 10) sti.stiFromAudio(t_data, x_recon, 16000, calcref=False, downsample=None, name="unnamed") sig = Signal(x_recon, 16000, normalize=True) #### the best is: np.unravel_index(np.argmax(masked_sti_scores), masked_sti_scores.shape)
import numpy as np from PyMP import Signal, mp from PyMP.mdct.dico import Dico, LODico from PyMP.mdct.atom import Atom print "Running MP, OMP and local versions on synthetic k-sparse" scales = [16, 64, 256] dico = Dico(scales) M = len(scales) L = 256 * 4 k = 0.2*L # create a k-sparse signal sp_vec = np.zeros(M*L,) from PyMP.tools import mdct random_indexes = np.arange(M*L) np.random.shuffle(random_indexes) random_weights = np.random.randn(M*L) sp_vec[random_indexes[0:k]] = random_weights[0:k] sparse_data = np.zeros(L,) for m in range(M): sparse_data += mdct.imdct(sp_vec[m*L:(m+1)*L], scales[m]) signal_original = Signal(sparse_data, Fs=8000, mono=True, normalize=False) signal_original.data += 0.01 * np.random.random(signal_original.length,) n_atoms = k signal_original.pad(dico.get_pad()) app_2, dec2 = mp.greedy(signal_original, dico, 100, n_atoms, debug=0, pad=False, update='locgp') app_1, dec1 = mp.greedy(signal_original, dico, 100, n_atoms, debug=0, pad=False, update='mp') app_3, dec3 = mp.greedy(signal_original, dico, 100, n_atoms, debug=0, pad=False, update='locomp')
''' doc.pyplots.Spectro_example - Created on Apr 23, 2013 @author: M. Moussallam ''' import os.path as op from PyMP import Signal, approx import matplotlib.pyplot as plt abPath = op.abspath('../../data/') sig = Signal(op.join(abPath, 'glocs.wav'), normalize=True, mono=True) import matplotlib.cm as cm plt.figure() sig.spectrogram(1024, 128, order=2, log=True, cmap=cm.hot, cbar=True) plt.show()
import matplotlib.pyplot as plt from PyMP import Signal, mp from PyMP.mdct import Dico, LODico import matplotlib as mpl mpl.rcParams['lines.linewidth'] = 1.0 mpl.rcParams['font.size'] = 16.0 mpl.rcParams['legend.fancybox'] = True mpl.rcParams['legend.shadow'] = True mpl.rcParams['image.interpolation'] = 'Nearest' #mpl.rcParams['text.usetex'] = True # Load glockenspiel signal abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) sig.crop(0, 3 * sig.fs) scales = [128, 1024, 8192] n_atoms = 500 srr = 30 mp_dico = Dico(scales) lomp_dico = LODico(scales) mp_approx, mp_decay = mp.mp(sig, mp_dico, srr, n_atoms, pad=True) lomp_approx, lomp_decay = mp.mp(sig, lomp_dico, srr, n_atoms, pad=False) plt.figure() plt.subplot(211)
output_audio_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Audio/' output_fig_path = '/home/manu/Documents/Articles/ISMIR2013/ListeningMSD/Figures/' colormap = cm.jet format = (8,3) # also load the Dan Ellis's synthesized version # The Piano cross-synthesis and the Viterbi smoothed Musaicing? # resynthesize using the first N frames n_max_frames = 900 nb_gl_iter = 30 init_vec = np.random.randn(128*n_max_frames) x_recon_median = transforms.gl_recons(median_magspec[:,:n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) sig_median = Signal(x_recon_median, 22050,normalize=True) sig_median.write(os.path.join(output_audio_path, '%s_add_median.wav'%t_name)) plt.figure(figsize=format) sig_median.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_add_median.png'%t_name)) init_vec = np.random.randn(128*n_max_frames) x_recon_orig = transforms.gl_recons(orig_spec[:,:n_max_frames], init_vec, nb_gl_iter, 512, 128, display=False) sig_orig= Signal(x_recon_orig, 22050,normalize=True) sig_orig.write(os.path.join(output_audio_path, '%s_original.wav'%t_name)) plt.figure(figsize=format) sig_orig.spectrogram(512, 128, order=1, log=True, cmap=colormap, cbar=False) plt.savefig(os.path.join(output_fig_path, '%s_original.png'%t_name)) init_vec = np.random.randn(128*n_max_frames)
## Initialize the sketchifier #sk = STFTPeaksSketch(**{'scale':2048, 'step':512}) sk = CorticoIndepSubPeaksSketch(**{ 'fs': fs, 'downsample': fs, 'frmlen': 8, 'shift': 0, 'fac': -2, 'BP': 1 }) #sk = CochleoPeaksSketch(**{'fs':fs,'step':512,'downsample':fs}) sk_id = sk.__class__.__name__[:-6] # initialize the sketch on noise sk.recompute(Signal(np.random.randn(seg_dur * fs), fs, mono=True)) (N, M) = sk.cort.cor.shape[:2] sizes = np.zeros((N, M / 2, len(sparsities))) nkeys = np.zeros((N, M / 2, len(sparsities))) scores = np.zeros((N, M / 2, len(sparsities))) cons_scores = np.zeros((N, M / 2, len(sparsities))) times = [] for sp_ind, sparsity in enumerate(sparsities): # we just need a short adaptation sk.sparsify(sparsity) sc_name = "%s_%s_k%d_%s_%dsec_%dfs_test%d_step%d.mat" % ( set_id, sk_id, sparsity, sk.get_sig(), int(seg_dur), int(fs),
""" Tutorial provided as part of PyMP M. Moussallam """ from PyMP.mdct import Dico, LODico from PyMP.mdct.rand import SequenceDico from PyMP import mp, mp_coder, Signal signal = Signal('../data/ClocheB.wav', mono=True) # Load Signal signal.crop(0, 4.0 * signal.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * signal.fs / 1000) for s in (8, 64, 512)] signal.pad(scales[-1]) # Dictionary for Standard MP dico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations app, dec = mp.mp(signal, dico, 20, 2000, pad=False) app.atom_number snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=14) print (snr, bitrate) print "With Q=5" snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=5) print (snr, bitrate)
def expe_1_synth_from_same_sample(): input_dir = '/sons/rwc/Learn/' output_dir = '/sons/rwc/Learn/hdf5/' audiofile = input_dir + 'rwc-g-m01_1.wav' h5file = output_dir + 'rwc-g-m01_1.h5' # load the Echo Nest features h5 = hdf5_getters.open_h5_file_read(h5file) timbre = hdf5_getters.get_segments_timbre(h5) loudness_start = hdf5_getters.get_segments_loudness_start(h5) loudness_max = hdf5_getters.get_segments_loudness_max(h5) loudness_max_time = hdf5_getters.get_segments_loudness_max_time(h5) C = hdf5_getters.get_segments_pitches(h5) segments_all = hdf5_getters.get_segments_start(h5) learn_feats_all = np.hstack((timbre, loudness_start.reshape((loudness_start.shape[0],1)), C)) # Ok That was the best possible case, now let us try to find the nearest neighbors, # get the segment back and resynthesize! learn_duration = 200 # in seconds test_start = 200 test_duration = 5 # Get learning data learning = Signal(audiofile, mono=True) learning.crop(0, learn_duration*learning.fs) wsize = 1024 tstep = 512 # Get the magnitude spectrum for the given audio file learn_specs = features.get_stft(learning.data, wsize, tstep) learn_specs = learn_specs.T max_l_seg_idx = np.where(segments_all < learn_duration)[0][-1] l_segments = segments_all[:max_l_seg_idx] l_segment_lengths = (l_segments[1:] - l_segments[0:-1])*learning.fs learn_feats = learn_feats_all[:max_l_seg_idx,:] # we must keep in mind for each segment index, the corresponding indices in the learn_spec mat l_seg_bounds = [] ref_time = np.arange(0., float(learning.length)/float(learning.fs), float(tstep)/float(learning.fs)) for segI in range(len(l_segments)-1): startIdx = np.where(ref_time > l_segments[segI])[0][0] endIdx = np.where(ref_time > l_segments[segI+1])[0][0] l_seg_bounds.append((startIdx,endIdx)) l_seg_bounds.append((endIdx, ref_time.shape[0])) # Get testing data testing = Signal(audiofile, mono=True) testing.crop(test_start*testing.fs, (test_start+test_duration)*learning.fs) # get the testing features min_t_seg_idx = np.where(segments_all < test_start)[0][-1] max_t_seg_idx = np.where(segments_all < test_start + test_duration)[0][-1] t_segments = segments_all[min_t_seg_idx:max_t_seg_idx] t_segment_lengths = (t_segments[1:] - t_segments[0:-1])*testing.fs test_feats = learn_feats_all[min_t_seg_idx:max_t_seg_idx,:] # find the nearest neighbors from sklearn.neighbors import NearestNeighbors neigh = NearestNeighbors(1) # fit on the learning data neigh.fit(learn_feats) neighb_segments_idx = neigh.kneighbors(test_feats, return_distance=False) # kneighs is a set of segment indices, we need to get the spectrogram back from the learning data # then fit the new segment lengths target_length = int(test_duration*testing.fs) neighb_segments = zip(neighb_segments_idx[:,0], t_segment_lengths.astype(int)) morphed_spectro = spec_morph(np.abs(learn_specs), target_length, neighb_segments, l_seg_bounds) # retrieve true stft for comparison test_specs = features.get_stft(testing.data, wsize, tstep) plt.figure() plt.subplot(121) plt.imshow(np.log(np.abs(test_specs)), origin='lower') plt.colorbar() plt.subplot(122) plt.imshow(np.log(morphed_spectro.T), origin='lower') plt.colorbar() plt.show() init_vec = np.random.randn(morphed_spectro.shape[0]*tstep) rec_method2 = transforms.gl_recons(morphed_spectro.T, init_vec, 10, wsize, tstep, display=False) rec_sig_2 = Signal(rec_method2, testing.fs, mono=True, normalize=True) rec_sig_2.write('/sons/tests/rec_sig2.wav')
def gen_harmo_sig(freqs, L, fs): x = np.arange(0.0,float(L)/float(fs),1.0/float(fs)) data = np.zeros(x.shape) for f in freqs: data += np.sin(2.0*np.pi*f*x) return Signal(data, fs, normalize=True, mono=True)
from PyMP import Signal from scipy.signal import lfilter, hann #audio_test_file = '/home/manu/workspace/recup_angelique/Sketches/NLS Toolbox/Hand-made Toolbox/forAngelique/61_sadness.wav' audio_test_file = op.abspath('./audio/original_surprise.wav') audio_name = 'surprise' from classes.sketches.bench import * from classes.sketches.misc import * from classes.sketches.cochleo import * from classes.sketches.cortico import * from classes.pydb import * fgpthandle = STFTPeaksBDB(None, **{'wall': False}) sk = STFTPeaksSketch(**{'scale': 2048, 'step': 512}) orig_sig = Signal(audio_test_file, normalize=True, mono=True) noisy_sig = Signal(orig_sig.data + 0.2 * np.random.randn(orig_sig.length), orig_sig.fs, normalize=True, mono=True) sk.recompute(orig_sig) sk.sparsify(20) plt.figure(figsize=(10, 6)) plt.subplot(221) orig_sig.spectrogram(512, 128, order=2, log=True, ax=plt.gca(), cmap=cm.bone_r,
plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[-1,0,:,:])) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[-1,0,:,:]), axis=0)) #plt.show() plt.figure() plt.subplot(211) plt.imshow(np.abs(sk.cort.cor[0,-1,:,:].T)) plt.subplot(212) plt.plot(np.sum(np.abs(sk.cort.cor[0,-1,:,:]), axis=1)) plt.show() sig = Signal(os.path.abspath('../reporting/audio/original_surprise.wav'), mono=True, normalize=True) #sig.crop(0, 2*sig.fs) sk = CorticoSubPeaksSketch(**{'n_inv_iter':5}) sk.recompute(sig) #sk.sparsify(100) #sk.represent() #plt.show() combis = [(0,6),(4,6),(0,11),(4,11)] for combi in combis: sk.sp_rep = np.zeros_like(sk.rep) sk.sp_rep[combi[0], combi[1], :,:] = sk.rep[combi[0], combi[1], :,:] aud_path = os.path.abspath('../reporting/figures/') f = plt.figure(figsize=(10,6))
sys.path.append('/home/manu/workspace/meeg_denoise') from src.tools import cochleo_tools #from classes import sketch import matplotlib.pyplot as plt from PyMP import Signal from scipy.signal import lfilter, hann from scipy.io import loadmat #from scipy.fftpack import fft, ifft from numpy.fft import fft, ifft plt.switch_backend('Agg') audio_test_file = '/home/manu/workspace/recup_angelique/Sketches/NLS Toolbox/nsltools/_done.au' audio_test_file = '/sons/jingles/panzani.wav' ############################### Inversion sig = Signal(audio_test_file, mono=True, normalize=True) sig.downsample(8000) # convert to auditory params = {'frmlen': 8, 'shift': 0, 'fac': -2, 'BP': 1} gram = cochleo_tools.Cochleogram(sig.data, **params) import cProfile cProfile.runctx('gram.build_aud()', globals(), locals()) cProfile.runctx('gram.build_aud_old()', globals(), locals()) aud = gram.build_aud() # Cortico-gram : 2D complex transform of y5 # we need to define y = gram.y5, para1= vector pf parameters, rv = rate vector, sv = scale vector y = np.array(gram.y5)
""" """ import numpy as np from PyMP.mdct import Dico, atom from PyMP import Signal, approx sig = Signal('../data/glocs.wav', debug_level=3) print sig print sig.data # sig.plot() # sig.write('newDestFile.wav') # editing print 'Before cropping Length of ', sig.length sig.crop(0, 2048) print 'After cropping Length of ', sig.length sub_sig = sig[0:2048] print sub_sig new_sig = Signal(np.ones((8,)), 1) new_sig.data print "Padding" new_sig.pad(4) new_sig.data print "De-Padding" new_sig.depad(4) new_sig.data