def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "Bach_prelude_40s.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0.1, maskSize=10) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) import matplotlib.pyplot as plt plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration")
def runTest(self): name = "orchestra" pySig = Signal(op.join(audio_filepath, "glocs.wav"), mono=True, normalize=True) pySig.crop(0, 5 * pySig.fs) pySig.pad(16384) sigEnergy = np.sum(pySig.data ** 2) dico = [128, 1024, 8192] nbAtoms = 200 classicDIco = mdct_dico.Dico(dico, useC=False) spreadDico = mdct_dico.SpreadDico( dico, all_scales=False, spread_scales=[1024, 8192], penalty=0.1, mask_time=2, mask_freq=2 ) approxClassic, decayClassic = mp.mp(pySig, classicDIco, 20, nbAtoms) approxSpread, decaySpread = mp.mp(pySig, spreadDico, 20, nbAtoms, pad=False) plt.figure(figsize=(16, 8)) plt.subplot(121) approxClassic.plot_tf(ylim=[0, 4000]) plt.title("Classic decomposition : 200 atoms 3xMDCT") plt.subplot(122) approxSpread.plot_tf(ylim=[0, 4000]) plt.title("Decomposition with TF masking: 200 atoms 3xMDCT") # plt.savefig(name + '_TestTFMasking.eps') plt.figure() plt.plot([10 * np.log10(i / sigEnergy) for i in decayClassic]) plt.plot([10 * np.log10(i / sigEnergy) for i in decaySpread], "r") plt.legend(("Classic decomposition", "Spreading Atoms")) plt.ylabel("Residual energy decay(dB)") plt.xlabel("Iteration") # plt.savefig(name + '_decayTFMasking.eps') plt.figure() for blockI in range(1, 3): block = spreadDico.blocks[blockI] plt.subplot(2, 2, blockI) print block.mask.shape, block.mask.shape[0] / (block.scale / 2), block.scale / 2 plt.imshow( np.reshape(block.mask, (block.mask.shape[0] / (block.scale / 2), block.scale / 2)), interpolation="nearest", aspect="auto", ) plt.colorbar() plt.subplot(2, 2, blockI + 2) # print block.mask.shape, block.mask.shape[0] / (block.scale/2), # block.scale/2 block.im_proj_matrix() plt.colorbar()
def recompute(self, signal=None, **kwargs): for key in kwargs: self.params[key] = kwargs[key] if signal is not None: if isinstance(signal, str): # TODO allow for stereo signals signal = Signal(signal, normalize=True, mono=True) self.orig_signal = signal if self.orig_signal is None: raise ValueError("No original Sound has been given") if self.params.has_key('fs'): self.orig_signal.resample(self.params['fs']) self.params['fs'] = self.orig_signal.fs mdct_dico = self._get_dico() from PyMP import mp self.rep = mp.mp(self.orig_signal, mdct_dico, self.params['SRR'], self.params['n_atoms'], silent_fail=True, pad=self.params['pad'], debug=self.params['debug'])[0]
def runTest(self): print "------------------ Test3 Populate from MP coeffs ---------" fileIndex = 2 RandomAudioFilePath = file_names[fileIndex] print 'Working on %s' % RandomAudioFilePath sizes = [2**j for j in range(7, 15)] segDuration = 5 nbAtom = 20 pySig = signals.Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True, normalize=True) segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] nbSeg = floor(pySig.length / segmentLength) # cropping pySig.crop(0, segmentLength) # create dictionary pyDico = Dico(sizes) approx, decay = mp.mp(pySig, pyDico, 20, nbAtom, pad=True, debug=0) ppdb = XMDCTBDB('MPdb.db', load=False) # ppdb.keyformat = None ppdb.populate(approx, None, fileIndex) nKeys = ppdb.get_stats()['ndata'] # compare the number of keys in the base to the number of atoms print ppdb.get_stats() self.assertEqual(nKeys, approx.atom_number) # now try to recover the fileIndex knowing one of the atoms Key = [ log(approx.atoms[0].length, 2), approx.atoms[0].reduced_frequency * pySig.fs ] T, fileI = ppdb.get(Key) Treal = (float(approx.atoms[0].time_position) / float(pySig.fs)) print T, Treal self.assertEqual(fileI[0], fileIndex) Tpy = np.array(T) self.assertTrue((np.abs(Tpy - Treal)).min() < 0.1) # last check: what does a request for non-existing atom in base return? T, fileI = ppdb.get((11, 120.0)) self.assertEqual(T, []) self.assertEqual(fileI, []) # now let's just retrieve the atoms from the base and see if they are # the same histograms = ppdb.retrieve(approx, None, offset=0) # plt.figure() # plt.imshow(histograms[0:10,:]) # plt.show() del ppdb
def runTest(self): dico = [128, 1024, 8192] nbAtoms = 100 pySig = Signal(op.join(audio_filepath, "Bach_prelude_40s.wav"), mono=True, normalize=True) classicDIco = mdct_dico.Dico(dico) spreadDico = mdct_dico.SpreadDico(dico, all_scales=True, penalty=0, maskSize=10) import time t = time.time() app_mp, _ = mp.mp(pySig, classicDIco, 20, nbAtoms) print "Classic took %1.3f sec" % (time.time() - t) t = time.time() app_spreadmp, _ = mp.mp(pySig, spreadDico, 20, nbAtoms) print "Spread took %1.3f sec" % (time.time() - t) plt.figure() plt.subplot(121) app_mp.plot_tf() plt.subplot(122) app_spreadmp.plot_tf() plt.show()
def recompute(self, signal=None, **kwargs): for key in kwargs: self.params[key] = kwargs[key] if signal is not None: if isinstance(signal, str): # TODO allow for stereo signals signal = Signal(signal, normalize=True, mono=True) self.orig_signal = signal if self.orig_signal is None: raise ValueError("No original Sound has been given") if self.params.has_key('fs'): self.orig_signal.downsample(self.params['fs']) # print "Downsampling" if self.params.has_key('crop'): self.orig_signal.crop(0, self.params['crop']) # print "Cropping" if self.params.has_key('pad'): self.orig_signal.pad(self.params['pad']) # print "Padding" self.params['fs'] = self.orig_signal.fs dico = self._get_dico() from PyMP import mp self.rep = mp.mp(self.orig_signal, dico, self.params['SRR'], self.params['n_atoms'], silent_fail=True, pad=False, debug=self.params['debug'], max_thread_num=3)[0]
def runTest(self): ''' take the base previously constructed and retrieve the song index based on 10 atoms ''' print "------------------ Test6 recognition ---------" nbCandidates = 8 ppdb = XMDCTBDB('LargeMPdb.db', load=True) print 'Large Db of ' + str(ppdb.get_stats()['nkeys']) + ' and ' + str( ppdb.get_stats()['ndata']) # Now take a song, decompose it and try to retrieve it fileIndex = 6 RandomAudioFilePath = file_names[fileIndex] print 'Working on ' + str(RandomAudioFilePath) pySig = signals.Signal(op.join(audio_files_path, RandomAudioFilePath), mono=True) pyDico = LODico(sizes) segDuration = 5 offsetDuration = 7 offset = offsetDuration * pySig.fs nbAtom = 50 segmentLength = ((segDuration * pySig.fs) / sizes[-1]) * sizes[-1] pySig.crop(offset, offset + segmentLength) approx, decay = mp.mp(pySig, pyDico, 40, nbAtom, pad=True) # plt.figure() # approx.plotTF() # plt.show() res = map(ppdb.get, map(ppdb.kform, approx.atoms), [(a.time_position - pyDico.get_pad()) / approx.fs for a in approx.atoms]) # #res = map(bdb.get, map(bdb.kform, approx.atoms)) histogram = np.zeros((600, nbCandidates)) for i in range(approx.atom_number): print res[i] histogram[res[i]] += 1 max1 = np.argmax(histogram[:]) Offset1 = max1 / nbCandidates estFile1 = max1 % nbCandidates # candidates , offsets = ppdb.retrieve(approx); # print approx.atom_number histograms = ppdb.retrieve(approx, None, offset=0, nbCandidates=8) # print histograms , np.max(histograms) , np.argmax(histograms, axis=0) , # np.argmax(histograms, axis=1) # plt.figure() # plt.imshow(histograms[0:20,:],interpolation='nearest') # plt.show() maxI = np.argmax(histograms[:]) OffsetI = maxI / nbCandidates estFileI = maxI % nbCandidates print fileIndex, offsetDuration, estFileI, OffsetI, estFile1, Offset1, max1, maxI import matplotlib.pyplot as plt # plt.figure(figsize=(12,6)) # plt.subplot(121) # plt.imshow(histograms,aspect='auto',interpolation='nearest') # plt.subplot(122) # plt.imshow(histogram,aspect='auto',interpolation='nearest') ## plt.imshow(histograms,aspect='auto',interpolation='nearest') ## plt.colorbar() # plt.show() print maxI, OffsetI, estFileI self.assertEqual(histograms[OffsetI, estFileI], np.max(histograms)) self.assertEqual(fileIndex, estFileI) self.assertTrue(abs(offsetDuration - OffsetI) <= 2.5)
def runTest(self): ppdb = XMDCTBDB('tempdb.db', load=False, persistent=True, time_max=500.0) pySig = signals.LongSignal(op.join(audio_files_path, file_names[0]), frame_duration=5, mono=False, Noverlap=0) self.assertEqual(pySig.segment_size, 5.0 * pySig.fs) max_nb_seg = 10 nb_atoms = 150 scales = SpreadDico([8192], penalty=0.1, mask_time=2, mask_freq=20) # scales = Dico([8192]) for segIdx in range(min(max_nb_seg, pySig.n_seg)): pySigLocal = pySig.get_sub_signal(segIdx, 1, mono=True, normalize=False, channel=0, pad=scales.get_pad()) print "MP on segment %d" % segIdx # run the decomposition approx, decay = mp.mp(pySigLocal, scales, 2, nb_atoms, pad=False) print "Populating database with offset " + str( segIdx * pySig.segment_size / pySig.fs) ppdb.populate(approx, None, 0, offset=float((segIdx * pySig.segment_size) - scales.get_pad()) / float(pySig.fs)) # ok we have a DB with only 1 file and different segments, now nb_test_seg = 15 long_sig_test = signals.LongSignal(op.join(audio_files_path, file_names[0]), frame_duration=5, mono=False, Noverlap=0.5) count = 0 for segIdx in range(min(nb_test_seg, long_sig_test.n_seg)): pySigLocal = long_sig_test.get_sub_signal(segIdx, 1, mono=True, normalize=False, channel=0, pad=scales.get_pad()) # print "MP on segment %d" % segIdx # run the decomposition approx, decay = mp.mp(pySigLocal, scales, 2, nb_atoms, pad=False) print approx.atom_number histograms = ppdb.retrieve(approx, None, nbCandidates=1) maxI = np.argmax(histograms[:]) OffsetI = maxI / 1 estFileI = maxI % 1 oracle_value = segIdx * long_sig_test.segment_size * ( 1 - long_sig_test.overlap) / long_sig_test.fs print "Seg %d Oracle: %1.1f - found %1.1f" % (segIdx, oracle_value, OffsetI) if abs(OffsetI - oracle_value) < 5: count += 1 glob = float(count) / float(min(nb_test_seg, long_sig_test.n_seg)) print "Global Score of %1.3f" % glob self.assertGreater(glob, 0.8)
def runTest(self): ''' time to test the fingerprinting scheme, create a base with 10 atoms for 8 songs, then Construct the histograms and retrieve the fileIndex and time offset that is the most plausible ''' print "------------------ Test5 DB construction ---------" # # create the base : persistent ppdb = XMDCTBDB('LargeMPdb.db', load=False, time_res=0.2) print ppdb padZ = 2 * sizes[-1] # BUGFIX: pour le cas MP classique: certains atome reviennent : pas # cool car paire key/data existe deja! pyDico = LODico(sizes) segDuration = 5 nbAtom = 50 sig = signals.LongSignal(op.join(audio_files_path, file_names[0]), frame_size=sizes[-1], mono=False, Noverlap=0) segmentLength = ((segDuration * sig.fs) / sizes[-1]) * sizes[-1] max_seg_num = 5 # " run MP on a number of files" nbFiles = 8 keycount = 0 for fileIndex in range(nbFiles): RandomAudioFilePath = file_names[fileIndex] print fileIndex, RandomAudioFilePath if not (RandomAudioFilePath[-3:] == 'wav'): continue pySig = signals.LongSignal(op.join(audio_files_path, RandomAudioFilePath), frame_size=segmentLength, mono=False, Noverlap=0) nbSeg = int(pySig.n_seg) print 'Working on ' + str(RandomAudioFilePath) + ' with ' + str( nbSeg) + ' segments' for segIdx in range(min(nbSeg, max_seg_num)): pySigLocal = pySig.get_sub_signal(segIdx, 1, True, True, channel=0, pad=padZ) print "MP on segment %d" % segIdx # run the decomposition approx, decay = mp.mp(pySigLocal, pyDico, 40, nbAtom, pad=False) print "Populating database with offset " + str( segIdx * segmentLength / sig.fs) ppdb.populate(approx, None, fileIndex, offset=float((segIdx * segmentLength) - padZ) / sig.fs) keycount += approx.atom_number print ppdb.get_stats()
import matplotlib.pyplot as plt import os from PyMP import Signal, mp, mp_coder from PyMP.mdct import Dico abPath = os.path.abspath("../../data/") sig = Signal(abPath + "/ClocheB.wav", mono=True) # Load Signal sig.crop(0, 4.0 * sig.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * sig.fs / 1000) for s in (8, 64, 512)] # Dictionary for Standard MP pyDico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations mpApprox, mpDecay = mp.mp(sig, pyDico, 20, 2000) # mpApprox.atomNumber SNR, bitrate, quantizedApprox = mp_coder.simple_mdct_encoding(mpApprox, 2000, Q=14) quantizedApprox.plot_tf() plt.show()
abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) # taking only the first musical phrase (3.5 seconds approximately) sig.crop(0, 3.5 * sig.fs) sig.pad(8192) # add some minor noise to avoid null areas sig.data += 0.0001 * np.random.randn(sig.length) # create MDCT multiscale dictionary dico = Dico(sizes) # run the MP routine approx, decay = mp.mp(sig, dico, 50, n_atoms) # plotting the results timeVec = np.arange(0, float(sig.length)) / sig.fs plt.figure(figsize=(10, 6)) axOrig = plt.axes([0.05, 0.55, .4, .4]) axOrig.plot(timeVec, sig.data) axOrig.set_title('(a)') axOrig.set_xticks([1, 2, 3, 4]) axOrig.set_ylim([-1.0, 1.0]) axApprox = plt.axes([0.05, 0.07, .4, .4]) axApprox.plot(timeVec, approx.recomposed_signal.data) axApprox.set_title('(c)') axApprox.set_xlabel('Temps (s)')
import numpy as np os.environ['PYMP_PATH'] = '/home/manu/workspace/PyMP/' from PyMP.mdct import Dico from PyMP import mp, Signal from PyMP.tools.Misc import euclid_dist, hamming_dist # Decomposing and visualizing the sparse dec signal = Signal(op.join(os.environ['PYMP_PATH'],'data/Bach_prelude_4s.wav'), mono=True) sig_occ1 = signal[:signal.length/2] sig_occ2 = signal[signal.length/2:] dico = Dico([128,1024,8192]) target_srr = 5 max_atom_num = 200 app_1, _ = mp.mp(sig_occ1, dico, target_srr, max_atom_num) app_2, _ = mp.mp(sig_occ2, dico, target_srr, max_atom_num) #plt.figure(figsize=(16,6)) #plt.subplot(121) #app_1.plot_tf() #plt.subplot(122) #app_2.plot_tf() #plt.show() sp_vec_1 = app_1.to_array()[0] sp_vec_2 = app_2.to_array()[0] print "%1.5f, %1.5f"%(euclid_dist(sp_vec_1,sp_vec_2), hamming_dist(sp_vec_1,sp_vec_2))
print "test the initialization function" if parallelProjections.initialize_plans(np.array(mdctDico), np.array(tol)) != 1: print "Initiliazing Stage Failed" if parallelProjections.clean_plans() != 1: print "Initiliazing Stage Failed" pySigOriginal = signals.InitFromFile("../../data/ClocheB.wav", True, True) pyDico2 = dico.Dico(mdctDico) pyDico_Lomp = dico.LODico(mdctDico) residualSignal = pySigOriginal.copy() app, decay = mp.mp(pySigOriginal, pyDico2, 20, 200, 0) print " profiling test with C integration" cProfile.runctx("mp.mp(pySigOriginal, pyDico2, 20, 200 ,0)", globals(), locals()) cProfile.runctx("mp.mp(pySigOriginal, pyDico_Lomp, 20, 200 ,0)", globals(), locals()) ################" C binding tests ######## N = 64 L = 16 if parallelProjections.initialize_plans(np.array([L]), np.array([2])) != 1: print "Initiliazing Stage Failed" P = N / (L / 2)
mpl.rcParams['legend.fancybox'] = True mpl.rcParams['legend.shadow'] = True mpl.rcParams['image.interpolation'] = 'Nearest' #mpl.rcParams['text.usetex'] = True # Load glockenspiel signal abPath = os.path.abspath('../../data/') sig = Signal(abPath + '/glocs.wav', mono=True, normalize=True) sig.crop(0, 3 * sig.fs) scales = [128, 1024, 8192] n_atoms = 500 srr = 30 mp_dico = Dico(scales) lomp_dico = LODico(scales) mp_approx, mp_decay = mp.mp(sig, mp_dico, srr, n_atoms, pad=True) lomp_approx, lomp_decay = mp.mp(sig, lomp_dico, srr, n_atoms, pad=False) plt.figure() plt.subplot(211) mp_approx.plot_tf() plt.subplot(212) lomp_approx.plot_tf() # print mp_approx , lomp_approx plt.show()
M. Moussallam """ from PyMP.mdct import Dico, LODico from PyMP.mdct.rand import SequenceDico from PyMP import mp, mp_coder, Signal signal = Signal('../data/ClocheB.wav', mono=True) # Load Signal signal.crop(0, 4.0 * signal.fs) # Keep only 4 seconds # atom of scales 8, 64 and 512 ms scales = [(s * signal.fs / 1000) for s in (8, 64, 512)] signal.pad(scales[-1]) # Dictionary for Standard MP dico = Dico(scales) # Launching decomposition, stops either at 20 dB of SRR or 2000 iterations app, dec = mp.mp(signal, dico, 20, 2000, pad=False) app.atom_number snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=14) print (snr, bitrate) print "With Q=5" snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 8000, Q=5) print (snr, bitrate) snr, bitrate, quantized_app = mp_coder.simple_mdct_encoding( app, 2000, Q=14)