Пример #1
0
def freqs_my_songs(filenames,
                   codebook,
                   pSize=8,
                   keyInv=True,
                   downBeatInv=False,
                   bars=2,
                   normalize=False):
    """
    Returns a list of numpy.array containing frequency for each
    code in the codebook for each file in filenames
    """
    import numpy as np
    import VQutils as VQU
    res = []
    nCodes = codebook.shape[0]
    for f in filenames:
        # encode song
        a, b, c, d, e = encode_one_song(f,
                                        codebook,
                                        pSize=pSize,
                                        keyInv=keyInv,
                                        downBeatInv=downBeatInv,
                                        bars=bars)
        best_code_per_p, featsNorm, encoding, featsNormMAT, encodingMAT = a, b, c, d, e
        # get freqs
        freqs = np.zeros([1, nCodes])
        for code in best_code_per_p:
            freqs[0, int(code)] += 1
        if normalize and len(best_code_per_p) > 0:
            freqs *= 1. / VQU.euclidean_norm(freqs)
        res.append(freqs)
    # done, return res
    return res
Пример #2
0
def merge_codebook(codebook, nGoal, freqs=[]):
    """
    merge the codebook in an iterative and greedy way.
    Algo:
      - finds closest pair of codes
      - merge them, using freqs if available
      - repeat until desired number of codes (nGoal)
    Returns smaller codebook, #codes=nGoal
    Also returns frequencies of the new codebook
    Code not optimized!!!!!! close to n^3 operations
    """
    import numpy as np
    import VQutils as VQU
    import copy
    # set freqs, sanity checks
    if freqs == []:
        freqs = np.ones(codebook.shape[0])
    freqs = np.array(freqs)
    assert (freqs.size == codebook.shape[0])
    assert (nGoal < codebook.shape[0])
    assert (nGoal > 0)
    # let's go!
    cb = copy.deepcopy(codebook)
    for k in range(codebook.shape[0] - nGoal):
        # compute dists for all pairs
        dists = np.zeros([cb.shape[0], cb.shape[0]])
        for l in range(dists.shape[0]):
            dists[l, l] = np.inf
            for c in range(l + 1, dists.shape[1]):
                dists[l, c] = VQU.euclidean_dist(cb[l], cb[c])
                dists[c, l] = np.inf
        # find closest pair
        pos = np.where(dists == dists.min())
        code1 = pos[0][0]
        code2 = pos[1][0]
        print 'iter', k, ' min distance=', dists.min(
        ), ' codes=', code1, ',', code2
        assert (code1 < code2
                )  #code1 should be smaller from how we filled dists
        # merge
        #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2])
        cb[code1, :] = np.mean([cb[code1, :], cb[code2, :]], axis=0)
        freqs[code1] += freqs[code2]
        # remove
        if code2 + 1 < cb.shape[0]:
            cb[code2, :] = cb[-1, :]
            freqs[code2] = freqs[-1]
        cb = cb[:-1]
        freqs = freqs[:-1]
    # done
    return cb, freqs
Пример #3
0
def merge_codebook(codebook,nGoal,freqs = []):
    """
    merge the codebook in an iterative and greedy way.
    Algo:
      - finds closest pair of codes
      - merge them, using freqs if available
      - repeat until desired number of codes (nGoal)
    Returns smaller codebook, #codes=nGoal
    Also returns frequencies of the new codebook
    Code not optimized!!!!!! close to n^3 operations
    """
    import numpy as np
    import VQutils as VQU
    import copy
    # set freqs, sanity checks
    if freqs == []:
        freqs = np.ones(codebook.shape[0])
    freqs = np.array(freqs)
    assert(freqs.size == codebook.shape[0])
    assert(nGoal < codebook.shape[0])
    assert(nGoal > 0)
    # let's go!
    cb = copy.deepcopy(codebook)
    for k in range(codebook.shape[0] - nGoal):
        # compute dists for all pairs
        dists = np.zeros([cb.shape[0],cb.shape[0]])
        for l in range(dists.shape[0]):
            dists[l,l] = np.inf
            for c in range(l+1,dists.shape[1]):
                dists[l,c] = VQU.euclidean_dist(cb[l],cb[c])
                dists[c,l] = np.inf
        # find closest pair
        pos = np.where(dists==dists.min())
        code1 = pos[0][0]
        code2 = pos[1][0]
        print 'iter',k,' min distance=',dists.min(),' codes=',code1,',',code2
        assert(code1 < code2)#code1 should be smaller from how we filled dists
        # merge
        #cb[code1,:] = np.mean([cb[code1,:]*freqs[code1],cb[code2,:]*freqs[code2]],axis=0) * 1. / (freqs[code1] + freqs[code2])
        cb[code1,:] = np.mean([cb[code1,:],cb[code2,:]],axis=0)
        freqs[code1] += freqs[code2]
        # remove
        if code2 + 1 < cb.shape[0]:
            cb[code2,:] = cb[-1,:]
            freqs[code2] = freqs[-1]
        cb = cb[:-1]
        freqs = freqs[:-1]
    # done
    return cb, freqs
Пример #4
0
def load_and_encode_data(codebook,pSize=4,keyInv=True,
                         downBeatInv=False,bars=1,partialbar=1,offset=0):
    """
    Load a dataset, and encode it with codebook
    Return dists, avg_dists
    """
    assert(codebook.shape[1] == pSize * 12)
    import VQutils
    # get data
    featsNorm = get_data_maxener(pSize=pSize,keyInv=keyInv,
                                 downBeatInv=downBeatInv,bars=bars,
                                 partialbar=partialbar,offset=offset)
    # encode
    best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(featsNorm,codebook,scale=False)
    return dists, avg_dists
Пример #5
0
def encode_one_song(filename,codebook,pSize=8,keyInv=True,
                    downBeatInv=False,bars=2):
    """
    returns: song, encoding, song as MAT, encoding as MAT
    matrices are 'derolled'
    """
    import feats_utils as FU
    import numpy as np
    import data_iterator
    import VQutils

    # create data iterator
    data_iter = data_iterator.DataIterator()
    data_iter.setMatfiles([filename]) # set matfiles
    if bars > 0:
        data_iter.useBars( bars )            # a pattern spans 'bars' bars
    else:
        data_iter.useBars(0)                 # important to set it to zero!
        data_iter.setFeatsize( pSize )       # a pattern is a num. of beats
    data_iter.stopAfterOnePass(True)
    # load data
    featsNorm = [FU.normalize_pattern_maxenergy(p,pSize,keyInv,downBeatInv,retRoll=True) for p in data_iter]
    keyroll = np.array([x[1] for x in featsNorm])
    dbroll = np.array([x[2] for x in featsNorm])
    featsNorm = [x[0].flatten() for x in featsNorm]
    if len(featsNorm) == 0: # empty song
        return [],[],[],[],[]
    featsNorm = np.array(featsNorm)
    res = [np.sum(r) > 0 for r in featsNorm]
    res2 = np.where(res)
    featsNorm = featsNorm[res2]
    keyroll = keyroll[res2]
    dbroll = dbroll[res2]
    assert(dbroll.shape[0] == keyroll.shape[0])
    assert(dbroll.shape[0] == featsNorm.shape[0])
    # find code per pattern
    best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(featsNorm,codebook)
    best_code_per_p = np.asarray([int(x) for x in best_code_per_p])
    assert best_code_per_p.shape[0] > 0,'empty song, we should have caught that'
    encoding = codebook[best_code_per_p]
    # transform into 2 matrices, with derolling!!!!!!!!!
    assert(featsNorm.shape[0] == encoding.shape[0])
    #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1)
    featsNormMAT = np.concatenate([np.roll(np.roll(featsNorm[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1)
    #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1)
    encodingMAT = np.concatenate([np.roll(np.roll(encoding[x].reshape(12,pSize),-keyroll[x],axis=0),-dbroll[x],axis=1) for x in range(featsNorm.shape[0])],axis=1)
    # return
    return best_code_per_p,featsNorm,encoding,featsNormMAT,encodingMAT
Пример #6
0
def LLE_my_codebook(codebook, nNeighbors=5, nRand=5):
    """
    Performs LLE on the codebook
    Display the result
    LLE code not mine, see code for reference.
    nRand=number of random images added
    """
    import pylab as P
    import LLE
    import numpy as np
    import VQutils as VQU
    # compute LLE, goal is 2D
    LLEres = LLE.LLE(codebook.T, nNeighbors, 2)
    # plot that result
    P.plot(LLEres[0, :], LLEres[1, :], '.')
    P.hold(True)
    # prepare to plot
    patch_size = codebook[0, :].size / 12
    # add random
    for k in range(nRand):
        idx = np.random.randint(LLEres.shape[1])
        add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
                  LLEres[1, idx], .08)
    # plot extreme left codebook
    idx = np.argmin(LLEres[0, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme right codebook
    idx = np.argmax(LLEres[0, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme up codebook
    idx = np.argmax(LLEres[1, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot extreme down codebook
    idx = np.argmin(LLEres[1, :])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # plot middle codebook
    idx = np.argmin([VQU.euclidean_dist(r, np.zeros(2)) for r in LLEres.T])
    add_image(P, codebook[idx, :].reshape(12, patch_size), LLEres[0, idx],
              LLEres[1, idx])
    # done, release, show
    P.hold(False)
    P.show()
Пример #7
0
def LLE_my_codebook(codebook,nNeighbors=5,nRand=5):
    """
    Performs LLE on the codebook
    Display the result
    LLE code not mine, see code for reference.
    nRand=number of random images added
    """
    import pylab as P
    import LLE
    import numpy as np
    import VQutils as VQU
    # compute LLE, goal is 2D
    LLEres = LLE.LLE(codebook.T,nNeighbors,2)
    # plot that result
    P.plot(LLEres[0,:],LLEres[1,:],'.')
    P.hold(True)
    # prepare to plot
    patch_size = codebook[0,:].size / 12
    # add random
    for k in range(nRand):
        idx = np.random.randint(LLEres.shape[1])
        add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx],.08)
    # plot extreme left codebook
    idx = np.argmin(LLEres[0,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme right codebook
    idx = np.argmax(LLEres[0,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme up codebook
    idx = np.argmax(LLEres[1,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot extreme down codebook
    idx = np.argmin(LLEres[1,:])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # plot middle codebook
    idx = np.argmin([VQU.euclidean_dist(r,np.zeros(2)) for r in LLEres.T])
    add_image(P,codebook[idx,:].reshape(12,patch_size),LLEres[0,idx],LLEres[1,idx])
    # done, release, show
    P.hold(False)
    P.show()
Пример #8
0
def load_and_encode_data(codebook,
                         pSize=4,
                         keyInv=True,
                         downBeatInv=False,
                         bars=1,
                         partialbar=1,
                         offset=0):
    """
    Load a dataset, and encode it with codebook
    Return dists, avg_dists
    """
    assert (codebook.shape[1] == pSize * 12)
    import VQutils
    # get data
    featsNorm = get_data_maxener(pSize=pSize,
                                 keyInv=keyInv,
                                 downBeatInv=downBeatInv,
                                 bars=bars,
                                 partialbar=partialbar,
                                 offset=offset)
    # encode
    best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(
        featsNorm, codebook, scale=False)
    return dists, avg_dists
Пример #9
0
def freqs_my_songs(filenames,codebook,pSize=8,keyInv=True,
                   downBeatInv=False,bars=2,normalize=False):
    """
    Returns a list of numpy.array containing frequency for each
    code in the codebook for each file in filenames
    """
    import numpy as np
    import VQutils as VQU
    res = []
    nCodes = codebook.shape[0]
    for f in filenames:
        # encode song
        a,b,c,d,e = encode_one_song(f,codebook,pSize=pSize,keyInv=keyInv,
                                    downBeatInv=downBeatInv,bars=bars)
        best_code_per_p,featsNorm,encoding,featsNormMAT,encodingMAT = a,b,c,d,e
        # get freqs
        freqs = np.zeros([1,nCodes])
        for code in best_code_per_p:
            freqs[0,int(code)] += 1
        if normalize and len(best_code_per_p) > 0:
            freqs *= 1./ VQU.euclidean_norm(freqs)
        res.append(freqs)
    # done, return res
    return res
Пример #10
0
def test_align_one_song(filename, codebook):
    """
    Experiment on how good can we find the alignment of a song
    Designed for a codebook of pSize=4, bars=1
    If song has non 4 beats patterns, problem

    Return is complex:
      - -1      if could not perform test
      - 0       if test succesful
      - 1-2-3   by how many beats we missed
    """

    import scipy
    import scipy.io
    import numpy as np
    import feats_utils as FU
    import VQutils as VQU

    mat = mat = scipy.io.loadmat(filename)
    btstart = mat['btstart']
    barstart = mat['barstart']
    try:
        btstart = btstart.flatten()
        barstart = barstart.flatten()
        if btstart.shape[0] < 3 or barstart.shape[0] < 3:
            return -1  # can not complete
    except IndexError:
        print 'index error'
        return -1  # can not complete
    except AttributeError:
        return -1  # can not complete
    # find bar start based on beat index
    barstart_idx = [np.where(btstart == x)[0][0] for x in barstart]
    barstart_idx.append(btstart.shape[0])
    # find bar lengths
    barlengths = np.diff(barstart_idx)
    # find not4 elems
    not4 = np.where(barlengths != 4)[0]
    not4 = np.concatenate([[0], not4, [len(barlengths)]])
    # find longest sequence of bars of length 4 beats
    seqs_of_4 = np.diff(not4)
    if len(not4) > 1:
        longest_seq_length = np.max(seqs_of_4) - 1
    else:
        longest_seq_length = not4[0]
    if longest_seq_length < 10:  # why 10? bof....
        #print 'return because longest seq has length:',longest_seq_length
        return -1  # can not complete
    # find best seq pos
    pos1 = not4[np.argmax(seqs_of_4)] + 1
    pos2 = not4[np.argmax(seqs_of_4) + 1]
    # longest sequence should be in range(pos1,pos2)
    # sanity checks
    assert pos2 - pos1 == longest_seq_length
    for k in range(pos1, pos2):
        assert barlengths[k] == 4
    # position in beats
    beat_pos_1 = barstart_idx[pos1]
    beat_pos_2 = beat_pos_1 + 4 * longest_seq_length
    assert beat_pos_2 == btstart.shape[0] or np.where(
        barstart_idx == beat_pos_2)[0].shape[0] > 0
    # load actual beat features
    btchroma = mat['btchroma']
    # try everything: offset 0 to 3
    best_offset = -1
    best_avg_dist = np.inf
    for offset in range(4):
        avg_dist = 0
        for baridx in range(longest_seq_length - 1):
            pos = beat_pos_1 + offset + baridx * 4
            feats = btchroma[:, pos:pos + 4]
            featsNorm = FU.normalize_pattern_maxenergy(feats,
                                                       newsize=4,
                                                       keyinvariant=True,
                                                       downbeatinvariant=False)
            # measure with codebook
            tmp, dists = VQU.encode_oneiter(featsNorm.flatten(), codebook)
            avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size
        #print 'avg_dist=',avg_dist,'for offset',offset
        if best_avg_dist > avg_dist:
            best_avg_dist = avg_dist
            best_offset = offset
    # done, return offset, which is 0 if fine
    return best_offset
Пример #11
0
def knn_from_freqs_on_artists(filenames,
                              codebook,
                              pSize=8,
                              keyInv=True,
                              downBeatInv=False,
                              bars=2,
                              normalize=True,
                              confMatrix=True,
                              use_l0_dist=False,
                              use_artists=False):
    """
    Performs a leave-one-out experiments where we try to guess the artist
    from it's nearest neighbors in frequencies
    We use squared euclidean distance.

    filenames are expected to be: */artist/album/*.mat
    if confMatrix=True, plot it.
    if use_artists, song are matched to artist, not other songs

    RETURNS:
    - confusion matrix
    - freqs per file
    - artist per file
    """
    import numpy as np
    import os
    import VQutils as VQU
    import time
    import copy

    nCodes = codebook.shape[0]
    # get frequencies for all songs
    tstart = time.time()
    freqs = freqs_my_songs(filenames,
                           codebook,
                           pSize=pSize,
                           keyInv=keyInv,
                           downBeatInv=downBeatInv,
                           bars=bars,
                           normalize=normalize)
    print 'all frequencies computed in', (time.time() - tstart), 'seconds.'
    # get artists for all songs
    artists = []
    for f in filenames:
        tmp, song = os.path.split(f)
        tmp, album = os.path.split(tmp)
        tmp, artist = os.path.split(tmp)
        artists.append(artist)
    artists = np.array(artists)
    # names of artists
    artist_names = np.unique(np.sort(artists))
    nArtists = artist_names.shape[0]
    # sanity check
    assert (len(filenames) == len(artists))
    # compute distance between all songs
    nFiles = len(filenames)
    tstart = time.time()
    if not use_artists:
        dists = np.zeros([nFiles, nFiles])
        for l in range(nFiles):
            for c in range(l + 1, nFiles):
                if len(freqs[l]) == 0 or len(freqs[c]) == 0:
                    dists[l, c] = np.inf
                    dists[c, l] = np.inf
                    continue
                if use_l0_dist:
                    dists[l, c] = l0_dist(freqs[l], freqs[c])
                else:
                    dists[l, c] = VQU.euclidean_dist(freqs[l], freqs[c])
                    dists[c, l] = dists[l, c]
        for l in range(nFiles):  # fill diag with inf
            dists[l, l] = np.inf
    else:
        # create a matrix songs * nArtists
        dists = np.zeros([nFiles, nArtists])
        # precompute cntArtists and artistFreqs, not normalized
        cntArtists = {}
        artistFreqs = {}
        for k in artist_names:
            cntArtists[k] = 0
            artistFreqs[k] = np.zeros([1, nCodes])
        for k in range(artists.shape[0]):
            art = artists[k]
            cntArtists[art] += 1
            artistFreqs[art] += freqs[k]
        # iterate over files
        for l in range(nFiles):
            currArtist = artists[l]
            currCntArtists = copy.deepcopy(cntArtists)
            currCntArtists[currArtist] -= 1
            currArtistFreqs = copy.deepcopy(artistFreqs)
            currArtistFreqs[currArtist] -= freqs[l]
            for k in currArtistFreqs.keys():  # normalize
                currArtistFreqs[k] *= 1. / currCntArtists[k]
            # fill in the line in dists
            for c in range(nArtists):
                art = artist_names[c]
                if use_l0_dist:
                    dists[l, c] = l0_dist(freqs[l], currArtistFreqs[art])
                else:
                    dists[l, c] = VQU.euclidean_dist(freqs[l],
                                                     currArtistFreqs[art])
    print 'distances computed in', (time.time() - tstart), 'seconds.'
    # confusion matrix
    confMat = np.zeros([nArtists, nArtists])
    # performs leave-one-out KNN
    nExps = 0
    nGood = 0
    randScore = 0  # sums prob of having it right by luck, must divide by nExps
    for songid in range(nFiles):
        if len(freqs[songid]) == 0:
            continue
        # get close matches ordered, remove inf
        orderedMatches = np.argsort(dists[songid, :])
        orderedMatches[np.where(dists[1, orderedMatches] != np.inf)]
        # artist
        artist = artists[songid]
        nMatches = orderedMatches.shape[0]
        if use_artists:
            assert nMatches == nArtists
        # get stats
        nExps += 1
        if not use_artists:
            nGoodMatches = np.where(
                artists[orderedMatches] == artist)[0].shape[0]
            if nGoodMatches == 0:
                continue
            randScore += nGoodMatches * 1. / nMatches
            pred_artist = artists[orderedMatches[0]]
        else:
            randScore += 1. / nArtists
            pred_artist = artist_names[orderedMatches[0]]
        if pred_artist == artist:
            nGood += 1
        # fill confusion matrix
        real_artist_id = np.where(artist_names == artist)[0][0]
        pred_artist_id = np.where(artist_names == pred_artist)[0][0]
        print songid, ') real artist:', artist, 'id=', real_artist_id, ', pred artist:', pred_artist, 'id=', pred_artist_id
        confMat[real_artist_id, pred_artist_id] += 1
    # done, print out
    print 'nExps:', nExps
    print 'rand accuracy:', (randScore * 1. / nExps)
    print 'accuracy:', (nGood * 1. / nExps)
    # plot confusion matrix
    if confMatrix:
        short_names = np.array([x[:2] for x in artist_names])
        import pylab as P
        P.imshow(confMat,
                 interpolation='nearest',
                 cmap=P.cm.gray_r,
                 origin='lower')
        P.yticks(P.arange(artist_names.shape[0]), list(artist_names))
        P.xticks(P.arange(artist_names.shape[0]), list(short_names))
        P.title('confusion matrix (real/predicted)')
        P.ylabel('TRUE')
        P.xlabel('RECOG')
        P.colorbar()
    # return confusion matrix
    return confMat, freqs, artists
Пример #12
0
def encode_one_song(filename,
                    codebook,
                    pSize=8,
                    keyInv=True,
                    downBeatInv=False,
                    bars=2):
    """
    returns: song, encoding, song as MAT, encoding as MAT
    matrices are 'derolled'
    """
    import feats_utils as FU
    import numpy as np
    import data_iterator
    import VQutils

    # create data iterator
    data_iter = data_iterator.DataIterator()
    data_iter.setMatfiles([filename])  # set matfiles
    if bars > 0:
        data_iter.useBars(bars)  # a pattern spans 'bars' bars
    else:
        data_iter.useBars(0)  # important to set it to zero!
        data_iter.setFeatsize(pSize)  # a pattern is a num. of beats
    data_iter.stopAfterOnePass(True)
    # load data
    featsNorm = [
        FU.normalize_pattern_maxenergy(p,
                                       pSize,
                                       keyInv,
                                       downBeatInv,
                                       retRoll=True) for p in data_iter
    ]
    keyroll = np.array([x[1] for x in featsNorm])
    dbroll = np.array([x[2] for x in featsNorm])
    featsNorm = [x[0].flatten() for x in featsNorm]
    if len(featsNorm) == 0:  # empty song
        return [], [], [], [], []
    featsNorm = np.array(featsNorm)
    res = [np.sum(r) > 0 for r in featsNorm]
    res2 = np.where(res)
    featsNorm = featsNorm[res2]
    keyroll = keyroll[res2]
    dbroll = dbroll[res2]
    assert (dbroll.shape[0] == keyroll.shape[0])
    assert (dbroll.shape[0] == featsNorm.shape[0])
    # find code per pattern
    best_code_per_p, dists, avg_dists = VQutils.find_best_code_per_pattern(
        featsNorm, codebook)
    best_code_per_p = np.asarray([int(x) for x in best_code_per_p])
    assert best_code_per_p.shape[
        0] > 0, 'empty song, we should have caught that'
    encoding = codebook[best_code_per_p]
    # transform into 2 matrices, with derolling!!!!!!!!!
    assert (featsNorm.shape[0] == encoding.shape[0])
    #featsNormMAT = np.concatenate([x.reshape(12,pSize) for x in featsNorm],axis=1)
    featsNormMAT = np.concatenate([
        np.roll(np.roll(featsNorm[x].reshape(12, pSize), -keyroll[x], axis=0),
                -dbroll[x],
                axis=1) for x in range(featsNorm.shape[0])
    ],
                                  axis=1)
    #encodingMAT = np.concatenate([x.reshape(12,pSize) for x in encoding],axis=1)
    encodingMAT = np.concatenate([
        np.roll(np.roll(encoding[x].reshape(12, pSize), -keyroll[x], axis=0),
                -dbroll[x],
                axis=1) for x in range(featsNorm.shape[0])
    ],
                                 axis=1)
    # return
    return best_code_per_p, featsNorm, encoding, featsNormMAT, encodingMAT
Пример #13
0
def test_align_one_song(filename,codebook):
    """
    Experiment on how good can we find the alignment of a song
    Designed for a codebook of pSize=4, bars=1
    If song has non 4 beats patterns, problem

    Return is complex:
      - -1      if could not perform test
      - 0       if test succesful
      - 1-2-3   by how many beats we missed
    """

    import scipy
    import scipy.io
    import numpy as np
    import feats_utils as FU
    import VQutils as VQU

    mat = mat = scipy.io.loadmat(filename)
    btstart = mat['btstart']
    barstart = mat['barstart']
    try:
        btstart = btstart.flatten()
        barstart = barstart.flatten()
        if btstart.shape[0] < 3 or barstart.shape[0] < 3:
            return -1 # can not complete
    except IndexError:
        print 'index error'
        return -1 # can not complete
    except AttributeError:
        return -1 # can not complete
    # find bar start based on beat index
    barstart_idx = [np.where(btstart==x)[0][0] for x in barstart]
    barstart_idx.append(btstart.shape[0])
    # find bar lengths
    barlengths = np.diff(barstart_idx)
    # find not4 elems
    not4 = np.where(barlengths!=4)[0]
    not4 = np.concatenate([[0],not4,[len(barlengths)]])
    # find longest sequence of bars of length 4 beats
    seqs_of_4 = np.diff(not4)
    if len(not4)>1:
        longest_seq_length = np.max(seqs_of_4) -1
    else:
        longest_seq_length = not4[0]
    if longest_seq_length < 10: # why 10? bof....
        #print 'return because longest seq has length:',longest_seq_length
        return -1 # can not complete
    # find best seq pos
    pos1 = not4[np.argmax(seqs_of_4)]+1
    pos2 = not4[np.argmax(seqs_of_4)+1]
    # longest sequence should be in range(pos1,pos2)
    # sanity checks
    assert pos2 - pos1 == longest_seq_length
    for k in range(pos1,pos2):
        assert barlengths[k] == 4
    # position in beats
    beat_pos_1 = barstart_idx[pos1]
    beat_pos_2 = beat_pos_1 + 4 * longest_seq_length
    assert beat_pos_2 == btstart.shape[0] or np.where(barstart_idx==beat_pos_2)[0].shape[0]>0
    # load actual beat features
    btchroma = mat['btchroma']
    # try everything: offset 0 to 3
    best_offset = -1
    best_avg_dist = np.inf
    for offset in range(4):
        avg_dist = 0
        for baridx in range(longest_seq_length-1):
            pos = beat_pos_1 + offset + baridx * 4
            feats = btchroma[:,pos:pos+4]
            featsNorm = FU.normalize_pattern_maxenergy(feats,newsize=4,
                                                       keyinvariant=True,
                                                       downbeatinvariant=False)
            # measure with codebook
            tmp,dists = VQU.encode_oneiter(featsNorm.flatten(),codebook)
            avg_dist += (dists[0] * dists[0]) * 1. / featsNorm.size
        #print 'avg_dist=',avg_dist,'for offset',offset
        if best_avg_dist > avg_dist:
            best_avg_dist = avg_dist
            best_offset = offset
    # done, return offset, which is 0 if fine
    return best_offset
Пример #14
0
def knn_from_freqs_on_artists(filenames,codebook,pSize=8,keyInv=True,
                              downBeatInv=False,bars=2,normalize=True,
                              confMatrix=True,use_l0_dist=False,use_artists=False):
    """
    Performs a leave-one-out experiments where we try to guess the artist
    from it's nearest neighbors in frequencies
    We use squared euclidean distance.

    filenames are expected to be: */artist/album/*.mat
    if confMatrix=True, plot it.
    if use_artists, song are matched to artist, not other songs

    RETURNS:
    - confusion matrix
    - freqs per file
    - artist per file
    """
    import numpy as np
    import os
    import VQutils as VQU
    import time
    import copy

    nCodes = codebook.shape[0]
    # get frequencies for all songs
    tstart = time.time()
    freqs = freqs_my_songs(filenames,codebook,pSize=pSize,keyInv=keyInv,
                           downBeatInv=downBeatInv,bars=bars,
                           normalize=normalize)
    print 'all frequencies computed in',(time.time()-tstart),'seconds.'
    # get artists for all songs
    artists = []
    for f in filenames:
        tmp, song = os.path.split(f)
        tmp,album = os.path.split(tmp)
        tmp,artist = os.path.split(tmp)
        artists.append(artist)
    artists = np.array(artists)
    # names of artists
    artist_names = np.unique(np.sort(artists))
    nArtists = artist_names.shape[0]
    # sanity check
    assert(len(filenames)==len(artists))
    # compute distance between all songs
    nFiles = len(filenames)
    tstart = time.time()
    if not use_artists:
        dists = np.zeros([nFiles,nFiles])
        for l in range(nFiles):
            for c in range(l+1,nFiles):
                if len(freqs[l])==0 or len(freqs[c])==0:
                    dists[l,c] = np.inf
                    dists[c,l] = np.inf
                    continue
                if use_l0_dist:
                    dists[l,c] = l0_dist(freqs[l],freqs[c])
                else:
                    dists[l,c] = VQU.euclidean_dist(freqs[l],freqs[c])
                    dists[c,l] = dists[l,c]
        for l in range(nFiles): # fill diag with inf
            dists[l,l] = np.inf
    else:
        # create a matrix songs * nArtists
        dists = np.zeros([nFiles,nArtists])
        # precompute cntArtists and artistFreqs, not normalized
        cntArtists = {}
        artistFreqs = {}
        for k in artist_names:
            cntArtists[k] = 0
            artistFreqs[k] = np.zeros([1,nCodes])
        for k in range(artists.shape[0]):
            art = artists[k]
            cntArtists[art] += 1
            artistFreqs[art] += freqs[k]
        # iterate over files
        for l in range(nFiles):
            currArtist = artists[l]
            currCntArtists = copy.deepcopy(cntArtists)
            currCntArtists[currArtist] -= 1
            currArtistFreqs = copy.deepcopy(artistFreqs)
            currArtistFreqs[currArtist] -= freqs[l]
            for k in currArtistFreqs.keys(): # normalize
                currArtistFreqs[k] *= 1. / currCntArtists[k]
            # fill in the line in dists
            for c in range(nArtists):
                art = artist_names[c]
                if use_l0_dist:
                    dists[l,c] = l0_dist(freqs[l],currArtistFreqs[art])
                else:
                    dists[l,c] = VQU.euclidean_dist(freqs[l],currArtistFreqs[art])
    print 'distances computed in',(time.time()-tstart),'seconds.'
    # confusion matrix
    confMat = np.zeros([nArtists,nArtists])
    # performs leave-one-out KNN
    nExps = 0
    nGood = 0
    randScore = 0 # sums prob of having it right by luck, must divide by nExps
    for songid in range(nFiles):
        if len(freqs[songid]) == 0:
            continue
        # get close matches ordered, remove inf
        orderedMatches = np.argsort(dists[songid,:])
        orderedMatches[np.where(dists[1,orderedMatches] != np.inf)]
        # artist
        artist = artists[songid]
        nMatches = orderedMatches.shape[0]
        if use_artists:
            assert nMatches == nArtists
        # get stats
        nExps += 1
        if not use_artists:
            nGoodMatches = np.where(artists[orderedMatches]==artist)[0].shape[0]
            if nGoodMatches == 0:
                continue
            randScore += nGoodMatches * 1. / nMatches
            pred_artist = artists[orderedMatches[0]]
        else:
            randScore += 1. / nArtists
            pred_artist = artist_names[orderedMatches[0]]
        if pred_artist == artist:
            nGood += 1
        # fill confusion matrix
        real_artist_id =np.where(artist_names==artist)[0][0]
        pred_artist_id =np.where(artist_names==pred_artist)[0][0]
        print songid,') real artist:',artist,'id=',real_artist_id,', pred artist:',pred_artist,'id=',pred_artist_id
        confMat[real_artist_id,pred_artist_id] += 1
    # done, print out
    print 'nExps:',nExps
    print 'rand accuracy:',(randScore*1./nExps)
    print 'accuracy:',(nGood*1./nExps)
    # plot confusion matrix
    if confMatrix:
        short_names = np.array([x[:2] for x in artist_names])
        import pylab as P
        P.imshow(confMat,interpolation='nearest',cmap=P.cm.gray_r,
                 origin='lower')
        P.yticks(P.arange(artist_names.shape[0]),list(artist_names))
        P.xticks(P.arange(artist_names.shape[0]),list(short_names))
        P.title('confusion matrix (real/predicted)')
        P.ylabel('TRUE')
        P.xlabel('RECOG')
        P.colorbar()
    # return confusion matrix
    return confMat,freqs,artists
Пример #15
0
def do_experiment(experiment_dir,beats,bars,nCodes,nSamples=0,useFirsts=False,seed=0,offset=0,partialbar=1,keyinv=True):
    """
    Performs an independant experiment!!!!
    """
    try:
        os.makedirs(experiment_dir)
    except OSError:
        pass

    np.random.seed(seed)

    args = dict(experiment_dir=experiment_dir, beats=beats, bars=bars,
                nCodes=nCodes, nSamples=nSamples, useFirsts=useFirsts,seed=seed,
                offset=offset,partialbar=partialbar,keyinv=keyinv)
    sp.io.savemat(os.path.join(experiment_dir, 'args.mat'), args)

    # TRAINING
    # go to the folder of features (per beat)
    os.chdir(featsDir)

    if not os.path.exists(os.path.join(experiment_dir, 'codebook.mat')):
        # load everything, unit: 1 bar, resized to 4 beats
        # key invariant, not downbeatinvariant
        featsNorm = demos.get_data_maxener(pSize=beats,keyInv=keyinv,downBeatInv=False,bars=bars,offset=offset,partialbar=partialbar)
        
        # select nSamples random samples out of it
        if nSamples == 0:
            nSamples = featsNorm.shape[0]
        if useFirsts:
            featsNorm = featsNorm[:nSamples]
            #r = range(featsNorm.shape[0]) # still randomize
            #np.random.shuffle(r)
            #featsNorm = featsNorm[r[:]]
            np.random.shuffle(featsNorm)
        else:
            r = range(featsNorm.shape[0])
            np.random.shuffle(r)
            featsNorm = featsNorm[r[:nSamples]]

        # train a codebook of size 100
        codebook,dists = VQutils.online_vq(featsNorm,nCodes,lrate=1e-2,nIter=200)
        sp.io.savemat(os.path.join(experiment_dir, 'codebook.mat'),
                      dict(codebook=codebook, dists=dists))

        del featsNorm
    else:
        mat = sp.io.loadmat(os.path.join(experiment_dir, 'codebook.mat'))
        codebook = mat['codebook']
        dists = codebook['dists']
        
    # TESTING
    # go to the folder of test features (per beat)
    os.chdir(testFeatsDir)
    
    # load and test
    dists,avg_dists = demos.load_and_encode_data(codebook,pSize=beats,
                                                 keyInv=keyinv,
                                                 downBeatInv=False,bars=bars)
    sp.io.savemat(os.path.join(experiment_dir, 'test.mat'),
                  dict(dists=dists, avg_dists=avg_dists))
    
    # report result (average sqaure distance per ... pixel?
    # with print outs to know what we are doing
    report = ['EXPERIMENT REPORT ******************************',
              'beats: %s , bars: %s , nCodes: %s , nSamples: %s , offset: %s , partialbar: %s'
              % (beats, bars, nCodes, nSamples, offset, partialbar)]
    if useFirsts:
        report.append('we use firsts %s samples' % nCodes)
    if not keyinv:
        report.append('not key invariant!')
    report.extend(['np.average(avg_dists): %s' % np.average(avg_dists),
                   '************************************************', ''])
    reportstr = '\n'.join(report)
    print reportstr
    f = open(os.path.join(experiment_dir, 'report.txt'), 'w')
    f.write(reportstr)
    f.close()