示例#1
0
def merge_codebook(codebook, nGoal, freqs=[]):
    """
    merge the codebook in an iterative and greedy way.
    Algo:
      - finds closest pair of codes
      - merge them, using freqs if available
      - repeat until desired number of codes (nGoal)
    Returns smaller codebook, #codes=nGoal
    Also returns frequencies of the new codebook
    Code not optimized!!!!!! close to n^3 operations
    """
    import numpy as np
    import VQutils as VQU
    import copy
    # set freqs, sanity checks
    if freqs == []:
        freqs = np.ones(codebook.shape[0])
    freqs = np.array(freqs)
    assert (freqs.size == codebook.shape[0])
    assert (nGoal < codebook.shape[0])
    assert (nGoal > 0)
    # let's go!
    cb = copy.deepcopy(codebook)
    for k in range(codebook.shape[0] - nGoal):
        # compute dists for all pairs
        dists = np.zeros([cb.shape[0], cb.shape[0]])
        for l in range(dists.shape[0]):
            dists[l, l] = np.inf
            for c in range(l + 1, dists.shape[1]):
                dists[l, c] = VQU.euclidean_dist(cb[l], cb[c])
                dists[c, l] = np.inf
        # find closest pair
        pos = np.where(dists == dists.min())
        code1 = pos[0][0]
        code2 = pos[1][0]
        print 'iter', k, ' min distance=', dists.min(
        ), ' codes=', code1, ',', code2
        assert (code1 < code2
                )  #code1 should be smaller from how we filled dists
        # merge
        #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2])
        cb[code1, :] = np.mean([cb[code1, :], cb[code2, :]], axis=0)
        freqs[code1] += freqs[code2]
        # remove
        if code2 + 1 < cb.shape[0]:
            cb[code2, :] = cb[-1, :]
            freqs[code2] = freqs[-1]
        cb = cb[:-1]
        freqs = freqs[:-1]
    # done
    return cb, freqs
示例#2
0
def merge_codebook(codebook,nGoal,freqs = []):
    """
    merge the codebook in an iterative and greedy way.
    Algo:
      - finds closest pair of codes
      - merge them, using freqs if available
      - repeat until desired number of codes (nGoal)
    Returns smaller codebook, #codes=nGoal
    Also returns frequencies of the new codebook
    Code not optimized!!!!!! close to n^3 operations
    """
    import numpy as np
    import VQutils as VQU
    import copy
    # set freqs, sanity checks
    if freqs == []:
        freqs = np.ones(codebook.shape[0])
    freqs = np.array(freqs)
    assert(freqs.size == codebook.shape[0])
    assert(nGoal < codebook.shape[0])
    assert(nGoal > 0)
    # let's go!
    cb = copy.deepcopy(codebook)
    for k in range(codebook.shape[0] - nGoal):
        # compute dists for all pairs
        dists = np.zeros([cb.shape[0],cb.shape[0]])
        for l in range(dists.shape[0]):
            dists[l,l] = np.inf
            for c in range(l+1,dists.shape[1]):
                dists[l,c] = VQU.euclidean_dist(cb[l],cb[c])
                dists[c,l] = np.inf
        # find closest pair
        pos = np.where(dists==dists.min())
        code1 = pos[0][0]
        code2 = pos[1][0]
        print 'iter',k,' min distance=',dists.min(),' codes=',code1,',',code2
        assert(code1 < code2)#code1 should be smaller from how we filled dists
        # merge
        #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2])
        cb[code1,:] = np.mean([cb[code1,:],cb[code2,:]],axis=0)
        freqs[code1] += freqs[code2]
        # remove
        if code2 + 1 < cb.shape[0]:
            cb[code2,:] = cb[-1,:]
            freqs[code2] = freqs[-1]
        cb = cb[:-1]
        freqs = freqs[:-1]
    # done
    return cb, freqs
示例#3
0
def LLE_my_codebook(codebook, nNeighbors=5, nRand=5):
    """
    Performs LLE on the codebook
    Display the result
    LLE code not mine, see code for reference.
    nRand=number of random images added
    """
    import pylab as P
    import LLE
    import numpy as np
    import VQutils as VQU
    # compute LLE, goal is 2D
    LLEres = LLE.LLE(codebook.T, nNeighbors, 2)
    # plot that result
    P.plot(LLEres[0, :], LLEres[1, :], '.')
    P.hold(True)
    # prepare to plot
    patch_size = codebook[0, :].size / 12
    # add random
    for k in range(nRand):
        idx = np.random.randint(LLEres.shape[1])
        add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
                  LLEres[1, idx], .08)
    # plot extreme left codebook
    idx = np.argmin(LLEres[0, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme right codebook
    idx = np.argmax(LLEres[0, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme up codebook
    idx = np.argmax(LLEres[1, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme down codebook
    idx = np.argmin(LLEres[1, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot middle codebook
    idx = np.argmin([VQU.euclidean_dist(r, np.zeros(2)) for r in LLEres.T])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # done, release, show
    P.hold(False)
    P.show()
示例#4
0
def LLE_my_codebook(codebook,nNeighbors=5,nRand=5):
    """
    Performs LLE on the codebook
    Display the result
    LLE code not mine, see code for reference.
    nRand=number of random images added
    """
    import pylab as P
    import LLE
    import numpy as np
    import VQutils as VQU
    # compute LLE, goal is 2D
    LLEres = LLE.LLE(codebook.T,nNeighbors,2)
    # plot that result
    P.plot(LLEres[0,:],LLEres[1,:],'.')
    P.hold(True)
    # prepare to plot
    patch_size = codebook[0,:].size / 12
    # add random
    for k in range(nRand):
        idx = np.random.randint(LLEres.shape[1])
        add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx],.08)
    # plot extreme left codebook
    idx = np.argmin(LLEres[0,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme right codebook
    idx = np.argmax(LLEres[0,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme up codebook
    idx = np.argmax(LLEres[1,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme down codebook
    idx = np.argmin(LLEres[1,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot middle codebook
    idx = np.argmin([VQU.euclidean_dist(r,np.zeros(2)) for r in LLEres.T])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # done, release, show
    P.hold(False)
    P.show()
示例#5
0
def knn_from_freqs_on_artists(filenames,
                              codebook,
                              pSize=8,
                              keyInv=True,
                              downBeatInv=False,
                              bars=2,
                              normalize=True,
                              confMatrix=True,
                              use_l0_dist=False,
                              use_artists=False):
    """
    Performs a leave-one-out experiments where we try to guess the artist
    from it's nearest neighbors in frequencies
    We use squared euclidean distance.

    filenames are expected to be: */artist/album/*.mat
    if confMatrix=True, plot it.
    if use_artists, song are matched to artist, not other songs

    RETURNS:
    - confusion matrix
    - freqs per file
    - artist per file
    """
    import numpy as np
    import os
    import VQutils as VQU
    import time
    import copy

    nCodes = codebook.shape[0]
    # get frequencies for all songs
    tstart = time.time()
    freqs = freqs_my_songs(filenames,
                           codebook,
                           pSize=pSize,
                           keyInv=keyInv,
                           downBeatInv=downBeatInv,
                           bars=bars,
                           normalize=normalize)
    print 'all frequencies computed in', (time.time() - tstart), 'seconds.'
    # get artists for all songs
    artists = []
    for f in filenames:
        tmp, song = os.path.split(f)
        tmp, album = os.path.split(tmp)
        tmp, artist = os.path.split(tmp)
        artists.append(artist)
    artists = np.array(artists)
    # names of artists
    artist_names = np.unique(np.sort(artists))
    nArtists = artist_names.shape[0]
    # sanity check
    assert (len(filenames) == len(artists))
    # compute distance between all songs
    nFiles = len(filenames)
    tstart = time.time()
    if not use_artists:
        dists = np.zeros([nFiles, nFiles])
        for l in range(nFiles):
            for c in range(l + 1, nFiles):
                if len(freqs[l]) == 0 or len(freqs[c]) == 0:
                    dists[l, c] = np.inf
                    dists[c, l] = np.inf
                    continue
                if use_l0_dist:
                    dists[l, c] = l0_dist(freqs[l], freqs[c])
                else:
                    dists[l, c] = VQU.euclidean_dist(freqs[l], freqs[c])
                    dists[c, l] = dists[l, c]
        for l in range(nFiles):  # fill diag with inf
            dists[l, l] = np.inf
    else:
        # create a matrix songs * nArtists
        dists = np.zeros([nFiles, nArtists])
        # precompute cntArtists and artistFreqs, not normalized
        cntArtists = {}
        artistFreqs = {}
        for k in artist_names:
            cntArtists[k] = 0
            artistFreqs[k] = np.zeros([1, nCodes])
        for k in range(artists.shape[0]):
            art = artists[k]
            cntArtists[art] += 1
            artistFreqs[art] += freqs[k]
        # iterate over files
        for l in range(nFiles):
            currArtist = artists[l]
            currCntArtists = copy.deepcopy(cntArtists)
            currCntArtists[currArtist] -= 1
            currArtistFreqs = copy.deepcopy(artistFreqs)
            currArtistFreqs[currArtist] -= freqs[l]
            for k in currArtistFreqs.keys():  # normalize
                currArtistFreqs[k] *= 1. / currCntArtists[k]
            # fill in the line in dists
            for c in range(nArtists):
                art = artist_names[c]
                if use_l0_dist:
                    dists[l, c] = l0_dist(freqs[l], currArtistFreqs[art])
                else:
                    dists[l, c] = VQU.euclidean_dist(freqs[l],
                                                     currArtistFreqs[art])
    print 'distances computed in', (time.time() - tstart), 'seconds.'
    # confusion matrix
    confMat = np.zeros([nArtists, nArtists])
    # performs leave-one-out KNN
    nExps = 0
    nGood = 0
    randScore = 0  # sums prob of having it right by luck, must divide by nExps
    for songid in range(nFiles):
        if len(freqs[songid]) == 0:
            continue
        # get close matches ordered, remove inf
        orderedMatches = np.argsort(dists[songid, :])
        orderedMatches[np.where(dists[1, orderedMatches] != np.inf)]
        # artist
        artist = artists[songid]
        nMatches = orderedMatches.shape[0]
        if use_artists:
            assert nMatches == nArtists
        # get stats
        nExps += 1
        if not use_artists:
            nGoodMatches = np.where(
                artists[orderedMatches] == artist)[0].shape[0]
            if nGoodMatches == 0:
                continue
            randScore += nGoodMatches * 1. / nMatches
            pred_artist = artists[orderedMatches[0]]
        else:
            randScore += 1. / nArtists
            pred_artist = artist_names[orderedMatches[0]]
        if pred_artist == artist:
            nGood += 1
        # fill confusion matrix
        real_artist_id = np.where(artist_names == artist)[0][0]
        pred_artist_id = np.where(artist_names == pred_artist)[0][0]
        print songid, ') real artist:', artist, 'id=', real_artist_id, ', pred artist:', pred_artist, 'id=', pred_artist_id
        confMat[real_artist_id, pred_artist_id] += 1
    # done, print out
    print 'nExps:', nExps
    print 'rand accuracy:', (randScore * 1. / nExps)
    print 'accuracy:', (nGood * 1. / nExps)
    # plot confusion matrix
    if confMatrix:
        short_names = np.array([x[:2] for x in artist_names])
        import pylab as P
        P.imshow(confMat,
                 interpolation='nearest',
                 cmap=P.cm.gray_r,
                 origin='lower')
        P.yticks(P.arange(artist_names.shape[0]), list(artist_names))
        P.xticks(P.arange(artist_names.shape[0]), list(short_names))
        P.title('confusion matrix (real/predicted)')
        P.ylabel('TRUE')
        P.xlabel('RECOG')
        P.colorbar()
    # return confusion matrix
    return confMat, freqs, artists
示例#6
0
def knn_from_freqs_on_artists(filenames,codebook,pSize=8,keyInv=True,
                              downBeatInv=False,bars=2,normalize=True,
                              confMatrix=True,use_l0_dist=False,use_artists=False):
    """
    Performs a leave-one-out experiments where we try to guess the artist
    from it's nearest neighbors in frequencies
    We use squared euclidean distance.

    filenames are expected to be: */artist/album/*.mat
    if confMatrix=True, plot it.
    if use_artists, song are matched to artist, not other songs

    RETURNS:
    - confusion matrix
    - freqs per file
    - artist per file
    """
    import numpy as np
    import os
    import VQutils as VQU
    import time
    import copy

    nCodes = codebook.shape[0]
    # get frequencies for all songs
    tstart = time.time()
    freqs = freqs_my_songs(filenames,codebook,pSize=pSize,keyInv=keyInv,
                           downBeatInv=downBeatInv,bars=bars,
                           normalize=normalize)
    print 'all frequencies computed in',(time.time()-tstart),'seconds.'
    # get artists for all songs
    artists = []
    for f in filenames:
        tmp, song = os.path.split(f)
        tmp,album = os.path.split(tmp)
        tmp,artist = os.path.split(tmp)
        artists.append(artist)
    artists = np.array(artists)
    # names of artists
    artist_names = np.unique(np.sort(artists))
    nArtists = artist_names.shape[0]
    # sanity check
    assert(len(filenames)==len(artists))
    # compute distance between all songs
    nFiles = len(filenames)
    tstart = time.time()
    if not use_artists:
        dists = np.zeros([nFiles,nFiles])
        for l in range(nFiles):
            for c in range(l+1,nFiles):
                if len(freqs[l])==0 or len(freqs[c])==0:
                    dists[l,c] = np.inf
                    dists[c,l] = np.inf
                    continue
                if use_l0_dist:
                    dists[l,c] = l0_dist(freqs[l],freqs[c])
                else:
                    dists[l,c] = VQU.euclidean_dist(freqs[l],freqs[c])
                    dists[c,l] = dists[l,c]
        for l in range(nFiles): # fill diag with inf
            dists[l,l] = np.inf
    else:
        # create a matrix songs * nArtists
        dists = np.zeros([nFiles,nArtists])
        # precompute cntArtists and artistFreqs, not normalized
        cntArtists = {}
        artistFreqs = {}
        for k in artist_names:
            cntArtists[k] = 0
            artistFreqs[k] = np.zeros([1,nCodes])
        for k in range(artists.shape[0]):
            art = artists[k]
            cntArtists[art] += 1
            artistFreqs[art] += freqs[k]
        # iterate over files
        for l in range(nFiles):
            currArtist = artists[l]
            currCntArtists = copy.deepcopy(cntArtists)
            currCntArtists[currArtist] -= 1
            currArtistFreqs = copy.deepcopy(artistFreqs)
            currArtistFreqs[currArtist] -= freqs[l]
            for k in currArtistFreqs.keys(): # normalize
                currArtistFreqs[k] *= 1. / currCntArtists[k]
            # fill in the line in dists
            for c in range(nArtists):
                art = artist_names[c]
                if use_l0_dist:
                    dists[l,c] = l0_dist(freqs[l],currArtistFreqs[art])
                else:
                    dists[l,c] = VQU.euclidean_dist(freqs[l],currArtistFreqs[art])
    print 'distances computed in',(time.time()-tstart),'seconds.'
    # confusion matrix
    confMat = np.zeros([nArtists,nArtists])
    # performs leave-one-out KNN
    nExps = 0
    nGood = 0
    randScore = 0 # sums prob of having it right by luck, must divide by nExps
    for songid in range(nFiles):
        if len(freqs[songid]) == 0:
            continue
        # get close matches ordered, remove inf
        orderedMatches = np.argsort(dists[songid,:])
        orderedMatches[np.where(dists[1,orderedMatches] != np.inf)]
        # artist
        artist = artists[songid]
        nMatches = orderedMatches.shape[0]
        if use_artists:
            assert nMatches == nArtists
        # get stats
        nExps += 1
        if not use_artists:
            nGoodMatches = np.where(artists[orderedMatches]==artist)[0].shape[0]
            if nGoodMatches == 0:
                continue
            randScore += nGoodMatches * 1. / nMatches
            pred_artist = artists[orderedMatches[0]]
        else:
            randScore += 1. / nArtists
            pred_artist = artist_names[orderedMatches[0]]
        if pred_artist == artist:
            nGood += 1
        # fill confusion matrix
        real_artist_id =np.where(artist_names==artist)[0][0]
        pred_artist_id =np.where(artist_names==pred_artist)[0][0]
        print songid,') real artist:',artist,'id=',real_artist_id,', pred artist:',pred_artist,'id=',pred_artist_id
        confMat[real_artist_id,pred_artist_id] += 1
    # done, print out
    print 'nExps:',nExps
    print 'rand accuracy:',(randScore*1./nExps)
    print 'accuracy:',(nGood*1./nExps)
    # plot confusion matrix
    if confMatrix:
        short_names = np.array([x[:2] for x in artist_names])
        import pylab as P
        P.imshow(confMat,interpolation='nearest',cmap=P.cm.gray_r,
                 origin='lower')
        P.yticks(P.arange(artist_names.shape[0]),list(artist_names))
        P.xticks(P.arange(artist_names.shape[0]),list(short_names))
        P.title('confusion matrix (real/predicted)')
        P.ylabel('TRUE')
        P.xlabel('RECOG')
        P.colorbar()
    # return confusion matrix
    return confMat,freqs,artists