def merge_codebook(codebook, nGoal, freqs=[]): """ merge the codebook in an iterative and greedy way. Algo: - finds closest pair of codes - merge them, using freqs if available - repeat until desired number of codes (nGoal) Returns smaller codebook, #codes=nGoal Also returns frequencies of the new codebook Code not optimized!!!!!! close to n^3 operations """ import numpy as np import VQutils as VQU import copy # set freqs, sanity checks if freqs == []: freqs = np.ones(codebook.shape[0]) freqs = np.array(freqs) assert (freqs.size == codebook.shape[0]) assert (nGoal < codebook.shape[0]) assert (nGoal > 0) # let's go! cb = copy.deepcopy(codebook) for k in range(codebook.shape[0] - nGoal): # compute dists for all pairs dists = np.zeros([cb.shape[0], cb.shape[0]]) for l in range(dists.shape[0]): dists[l, l] = np.inf for c in range(l + 1, dists.shape[1]): dists[l, c] = VQU.euclidean_dist(cb[l], cb[c]) dists[c, l] = np.inf # find closest pair pos = np.where(dists == dists.min()) code1 = pos[0][0] code2 = pos[1][0] print 'iter', k, ' min distance=', dists.min( ), ' codes=', code1, ',', code2 assert (code1 < code2 ) #code1 should be smaller from how we filled dists # merge #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2]) cb[code1, :] = np.mean([cb[code1, :], cb[code2, :]], axis=0) freqs[code1] += freqs[code2] # remove if code2 + 1 < cb.shape[0]: cb[code2, :] = cb[-1, :] freqs[code2] = freqs[-1] cb = cb[:-1] freqs = freqs[:-1] # done return cb, freqs
def merge_codebook(codebook,nGoal,freqs = []): """ merge the codebook in an iterative and greedy way. Algo: - finds closest pair of codes - merge them, using freqs if available - repeat until desired number of codes (nGoal) Returns smaller codebook, #codes=nGoal Also returns frequencies of the new codebook Code not optimized!!!!!! close to n^3 operations """ import numpy as np import VQutils as VQU import copy # set freqs, sanity checks if freqs == []: freqs = np.ones(codebook.shape[0]) freqs = np.array(freqs) assert(freqs.size == codebook.shape[0]) assert(nGoal < codebook.shape[0]) assert(nGoal > 0) # let's go! cb = copy.deepcopy(codebook) for k in range(codebook.shape[0] - nGoal): # compute dists for all pairs dists = np.zeros([cb.shape[0],cb.shape[0]]) for l in range(dists.shape[0]): dists[l,l] = np.inf for c in range(l+1,dists.shape[1]): dists[l,c] = VQU.euclidean_dist(cb[l],cb[c]) dists[c,l] = np.inf # find closest pair pos = np.where(dists==dists.min()) code1 = pos[0][0] code2 = pos[1][0] print 'iter',k,' min distance=',dists.min(),' codes=',code1,',',code2 assert(code1 < code2)#code1 should be smaller from how we filled dists # merge #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2]) cb[code1,:] = np.mean([cb[code1,:],cb[code2,:]],axis=0) freqs[code1] += freqs[code2] # remove if code2 + 1 < cb.shape[0]: cb[code2,:] = cb[-1,:] freqs[code2] = freqs[-1] cb = cb[:-1] freqs = freqs[:-1] # done return cb, freqs
def LLE_my_codebook(codebook, nNeighbors=5, nRand=5): """ Performs LLE on the codebook Display the result LLE code not mine, see code for reference. nRand=number of random images added """ import pylab as P import LLE import numpy as np import VQutils as VQU # compute LLE, goal is 2D LLEres = LLE.LLE(codebook.T, nNeighbors, 2) # plot that result P.plot(LLEres[0, :], LLEres[1, :], '.') P.hold(True) # prepare to plot patch_size = codebook[0, :].size / 12 # add random for k in range(nRand): idx = np.random.randint(LLEres.shape[1]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx], .08) # plot extreme left codebook idx = np.argmin(LLEres[0, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme right codebook idx = np.argmax(LLEres[0, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme up codebook idx = np.argmax(LLEres[1, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot extreme down codebook idx = np.argmin(LLEres[1, :]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # plot middle codebook idx = np.argmin([VQU.euclidean_dist(r, np.zeros(2)) for r in LLEres.T]) add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx], LLEres[1, idx]) # done, release, show P.hold(False) P.show()
def LLE_my_codebook(codebook,nNeighbors=5,nRand=5): """ Performs LLE on the codebook Display the result LLE code not mine, see code for reference. nRand=number of random images added """ import pylab as P import LLE import numpy as np import VQutils as VQU # compute LLE, goal is 2D LLEres = LLE.LLE(codebook.T,nNeighbors,2) # plot that result P.plot(LLEres[0,:],LLEres[1,:],'.') P.hold(True) # prepare to plot patch_size = codebook[0,:].size / 12 # add random for k in range(nRand): idx = np.random.randint(LLEres.shape[1]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx],.08) # plot extreme left codebook idx = np.argmin(LLEres[0,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme right codebook idx = np.argmax(LLEres[0,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme up codebook idx = np.argmax(LLEres[1,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot extreme down codebook idx = np.argmin(LLEres[1,:]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # plot middle codebook idx = np.argmin([VQU.euclidean_dist(r,np.zeros(2)) for r in LLEres.T]) add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx]) # done, release, show P.hold(False) P.show()
def knn_from_freqs_on_artists(filenames, codebook, pSize=8, keyInv=True, downBeatInv=False, bars=2, normalize=True, confMatrix=True, use_l0_dist=False, use_artists=False): """ Performs a leave-one-out experiments where we try to guess the artist from it's nearest neighbors in frequencies We use squared euclidean distance. filenames are expected to be: */artist/album/*.mat if confMatrix=True, plot it. if use_artists, song are matched to artist, not other songs RETURNS: - confusion matrix - freqs per file - artist per file """ import numpy as np import os import VQutils as VQU import time import copy nCodes = codebook.shape[0] # get frequencies for all songs tstart = time.time() freqs = freqs_my_songs(filenames, codebook, pSize=pSize, keyInv=keyInv, downBeatInv=downBeatInv, bars=bars, normalize=normalize) print 'all frequencies computed in', (time.time() - tstart), 'seconds.' # get artists for all songs artists = [] for f in filenames: tmp, song = os.path.split(f) tmp, album = os.path.split(tmp) tmp, artist = os.path.split(tmp) artists.append(artist) artists = np.array(artists) # names of artists artist_names = np.unique(np.sort(artists)) nArtists = artist_names.shape[0] # sanity check assert (len(filenames) == len(artists)) # compute distance between all songs nFiles = len(filenames) tstart = time.time() if not use_artists: dists = np.zeros([nFiles, nFiles]) for l in range(nFiles): for c in range(l + 1, nFiles): if len(freqs[l]) == 0 or len(freqs[c]) == 0: dists[l, c] = np.inf dists[c, l] = np.inf continue if use_l0_dist: dists[l, c] = l0_dist(freqs[l], freqs[c]) else: dists[l, c] = VQU.euclidean_dist(freqs[l], freqs[c]) dists[c, l] = dists[l, c] for l in range(nFiles): # fill diag with inf dists[l, l] = np.inf else: # create a matrix songs * nArtists dists = np.zeros([nFiles, nArtists]) # precompute cntArtists and artistFreqs, not normalized cntArtists = {} artistFreqs = {} for k in artist_names: cntArtists[k] = 0 artistFreqs[k] = np.zeros([1, nCodes]) for k in range(artists.shape[0]): art = artists[k] cntArtists[art] += 1 artistFreqs[art] += freqs[k] # iterate over files for l in range(nFiles): currArtist = artists[l] currCntArtists = copy.deepcopy(cntArtists) currCntArtists[currArtist] -= 1 currArtistFreqs = copy.deepcopy(artistFreqs) currArtistFreqs[currArtist] -= freqs[l] for k in currArtistFreqs.keys(): # normalize currArtistFreqs[k] *= 1. / currCntArtists[k] # fill in the line in dists for c in range(nArtists): art = artist_names[c] if use_l0_dist: dists[l, c] = l0_dist(freqs[l], currArtistFreqs[art]) else: dists[l, c] = VQU.euclidean_dist(freqs[l], currArtistFreqs[art]) print 'distances computed in', (time.time() - tstart), 'seconds.' # confusion matrix confMat = np.zeros([nArtists, nArtists]) # performs leave-one-out KNN nExps = 0 nGood = 0 randScore = 0 # sums prob of having it right by luck, must divide by nExps for songid in range(nFiles): if len(freqs[songid]) == 0: continue # get close matches ordered, remove inf orderedMatches = np.argsort(dists[songid, :]) orderedMatches[np.where(dists[1, orderedMatches] != np.inf)] # artist artist = artists[songid] nMatches = orderedMatches.shape[0] if use_artists: assert nMatches == nArtists # get stats nExps += 1 if not use_artists: nGoodMatches = np.where( artists[orderedMatches] == artist)[0].shape[0] if nGoodMatches == 0: continue randScore += nGoodMatches * 1. / nMatches pred_artist = artists[orderedMatches[0]] else: randScore += 1. / nArtists pred_artist = artist_names[orderedMatches[0]] if pred_artist == artist: nGood += 1 # fill confusion matrix real_artist_id = np.where(artist_names == artist)[0][0] pred_artist_id = np.where(artist_names == pred_artist)[0][0] print songid, ') real artist:', artist, 'id=', real_artist_id, ', pred artist:', pred_artist, 'id=', pred_artist_id confMat[real_artist_id, pred_artist_id] += 1 # done, print out print 'nExps:', nExps print 'rand accuracy:', (randScore * 1. / nExps) print 'accuracy:', (nGood * 1. / nExps) # plot confusion matrix if confMatrix: short_names = np.array([x[:2] for x in artist_names]) import pylab as P P.imshow(confMat, interpolation='nearest', cmap=P.cm.gray_r, origin='lower') P.yticks(P.arange(artist_names.shape[0]), list(artist_names)) P.xticks(P.arange(artist_names.shape[0]), list(short_names)) P.title('confusion matrix (real/predicted)') P.ylabel('TRUE') P.xlabel('RECOG') P.colorbar() # return confusion matrix return confMat, freqs, artists
def knn_from_freqs_on_artists(filenames,codebook,pSize=8,keyInv=True, downBeatInv=False,bars=2,normalize=True, confMatrix=True,use_l0_dist=False,use_artists=False): """ Performs a leave-one-out experiments where we try to guess the artist from it's nearest neighbors in frequencies We use squared euclidean distance. filenames are expected to be: */artist/album/*.mat if confMatrix=True, plot it. if use_artists, song are matched to artist, not other songs RETURNS: - confusion matrix - freqs per file - artist per file """ import numpy as np import os import VQutils as VQU import time import copy nCodes = codebook.shape[0] # get frequencies for all songs tstart = time.time() freqs = freqs_my_songs(filenames,codebook,pSize=pSize,keyInv=keyInv, downBeatInv=downBeatInv,bars=bars, normalize=normalize) print 'all frequencies computed in',(time.time()-tstart),'seconds.' # get artists for all songs artists = [] for f in filenames: tmp, song = os.path.split(f) tmp,album = os.path.split(tmp) tmp,artist = os.path.split(tmp) artists.append(artist) artists = np.array(artists) # names of artists artist_names = np.unique(np.sort(artists)) nArtists = artist_names.shape[0] # sanity check assert(len(filenames)==len(artists)) # compute distance between all songs nFiles = len(filenames) tstart = time.time() if not use_artists: dists = np.zeros([nFiles,nFiles]) for l in range(nFiles): for c in range(l+1,nFiles): if len(freqs[l])==0 or len(freqs[c])==0: dists[l,c] = np.inf dists[c,l] = np.inf continue if use_l0_dist: dists[l,c] = l0_dist(freqs[l],freqs[c]) else: dists[l,c] = VQU.euclidean_dist(freqs[l],freqs[c]) dists[c,l] = dists[l,c] for l in range(nFiles): # fill diag with inf dists[l,l] = np.inf else: # create a matrix songs * nArtists dists = np.zeros([nFiles,nArtists]) # precompute cntArtists and artistFreqs, not normalized cntArtists = {} artistFreqs = {} for k in artist_names: cntArtists[k] = 0 artistFreqs[k] = np.zeros([1,nCodes]) for k in range(artists.shape[0]): art = artists[k] cntArtists[art] += 1 artistFreqs[art] += freqs[k] # iterate over files for l in range(nFiles): currArtist = artists[l] currCntArtists = copy.deepcopy(cntArtists) currCntArtists[currArtist] -= 1 currArtistFreqs = copy.deepcopy(artistFreqs) currArtistFreqs[currArtist] -= freqs[l] for k in currArtistFreqs.keys(): # normalize currArtistFreqs[k] *= 1. / currCntArtists[k] # fill in the line in dists for c in range(nArtists): art = artist_names[c] if use_l0_dist: dists[l,c] = l0_dist(freqs[l],currArtistFreqs[art]) else: dists[l,c] = VQU.euclidean_dist(freqs[l],currArtistFreqs[art]) print 'distances computed in',(time.time()-tstart),'seconds.' # confusion matrix confMat = np.zeros([nArtists,nArtists]) # performs leave-one-out KNN nExps = 0 nGood = 0 randScore = 0 # sums prob of having it right by luck, must divide by nExps for songid in range(nFiles): if len(freqs[songid]) == 0: continue # get close matches ordered, remove inf orderedMatches = np.argsort(dists[songid,:]) orderedMatches[np.where(dists[1,orderedMatches] != np.inf)] # artist artist = artists[songid] nMatches = orderedMatches.shape[0] if use_artists: assert nMatches == nArtists # get stats nExps += 1 if not use_artists: nGoodMatches = np.where(artists[orderedMatches]==artist)[0].shape[0] if nGoodMatches == 0: continue randScore += nGoodMatches * 1. / nMatches pred_artist = artists[orderedMatches[0]] else: randScore += 1. / nArtists pred_artist = artist_names[orderedMatches[0]] if pred_artist == artist: nGood += 1 # fill confusion matrix real_artist_id =np.where(artist_names==artist)[0][0] pred_artist_id =np.where(artist_names==pred_artist)[0][0] print songid,') real artist:',artist,'id=',real_artist_id,', pred artist:',pred_artist,'id=',pred_artist_id confMat[real_artist_id,pred_artist_id] += 1 # done, print out print 'nExps:',nExps print 'rand accuracy:',(randScore*1./nExps) print 'accuracy:',(nGood*1./nExps) # plot confusion matrix if confMatrix: short_names = np.array([x[:2] for x in artist_names]) import pylab as P P.imshow(confMat,interpolation='nearest',cmap=P.cm.gray_r, origin='lower') P.yticks(P.arange(artist_names.shape[0]),list(artist_names)) P.xticks(P.arange(artist_names.shape[0]),list(short_names)) P.title('confusion matrix (real/predicted)') P.ylabel('TRUE') P.xlabel('RECOG') P.colorbar() # return confusion matrix return confMat,freqs,artists