Пример #1
0
def analyze_all(V, dfun=distance.euc_normed, dfw=True, **kwargs):
    """
    Perform DTW(+DFW) timbre analysis on list of audio files, return pair-wise costs
    inputs:
       V       - vowel analysis pool
       dfun    - distance function to use [bregman.distance.euc_normed]
       dfw     - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True]
       **kwargs  - arguments to dpcore.dp DTW algorithm
    outputs:
       Z - pair-wise cost matrix (timbre time/frequency warp) between audio files
    """
    Z = zeros((len(V),len(V)))
    kwargs['penalty'] = kwargs.pop('penalty',0.1)    
    kwargs['gutter'] = kwargs.pop('gutter',0.0)
    for i,a in enumerate(V):
        for j,b in enumerate(V):
        	D = dfun(a['mfccs'],b['mfccs'])
        	p,q,c,phi = dpcore.dp(D, **kwargs)
        	if not dfw:
        		Z[i,j] = diag(dfun(a['lqft'][p],b['lqft'][q])).mean()
        	else:
	        	alpha = optimal_vtln(b['lqft'][q], a['lqft'][p], 'symmetric')
	        	print "Optimal DFW (VTLN) warp alpha=", alpha
	        	Vi_warped = vtln(b['lqft'][q], 'symmetric', alpha)
	        	Z[i,j] = diag(dfun(Vi_warped,b['lqft'][q])).mean()
    return Z
Пример #2
0
def tsne_all(V, ref_idx=9, dfun=distance.euc_normed, dfw=True, tsne=True, perplexity=3, plotting=False, **kwargs):
    """
    Perform tsne / DTW / DFW timbre analysis on list of audio files, return pair-wise costs
    Uses tsne library
    inputs:
       V 	   - vowel analysis pool
    ref_idx    - reference audio file, all audio warped to this [9]
       dfun    - distance function to use [bregman.distance.euc_normed]
       dfw     - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True]    
       tsne    - whether to return t-SNE or just DTW spectrum [True]  
    perplexity - how many near neighbours in t-SNE [3]
    plotting   - whether to plot the t-SNE result [False]
       **kwargs  - arguments to dpcore.dp DTW algorithm    
    outputs:
       Z - 2D projection map of audio files
    """
    Z = []
    kwargs['penalty'] = kwargs.pop('penalty',0.1)    
    kwargs['gutter'] = kwargs.pop('gutter',0.0)
    for i in xrange(len(V)):
        C = dfun(V[ref_idx]['mfccs'],V[i]['mfccs'])
        p,q,c,phi = dpcore.dp(C, **kwargs)
        if not dfw:
            Z.append(V[i]['lqft'][q])
        else:
            alpha = optimal_vtln(V[i]['lqft'][q], V[ref_idx]['lqft'][p], 'symmetric')
            print "Optimal DFW (VTLN) warp alpha=", alpha
            Vi_warped = vtln(V[i]['lqft'][q], 'symmetric', alpha)
            Z.append(Vi_warped)
    if tsne:
        X = array([zz.flatten() for zz in Z], dtype='f8')
        Z = _tsne(X, perplexity=perplexity, plotting=plotting)
    return Z
Пример #3
0
def analyze_all(V, dfun=distance.euc_normed, dfw=True, **kwargs):
    """
    Perform DTW(+DFW) timbre analysis on list of audio files, return pair-wise costs
    inputs:
       V       - vowel analysis pool
       dfun    - distance function to use [bregman.distance.euc_normed]
       dfw     - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True]
       **kwargs  - arguments to dpcore.dp DTW algorithm
    outputs:
       Z - pair-wise cost matrix (timbre time/frequency warp) between audio files
    """
    Z = zeros((len(V), len(V)))
    kwargs['penalty'] = kwargs.pop('penalty', 0.1)
    kwargs['gutter'] = kwargs.pop('gutter', 0.0)
    for i, a in enumerate(V):
        for j, b in enumerate(V):
            D = dfun(a['mfccs'], b['mfccs'])
            p, q, c, phi = dpcore.dp(D, **kwargs)
            if not dfw:
                Z[i, j] = diag(dfun(a['lqft'][p], b['lqft'][q])).mean()
            else:
                alpha = optimal_vtln(b['lqft'][q], a['lqft'][p], 'symmetric')
                print "Optimal DFW (VTLN) warp alpha=", alpha
                Vi_warped = vtln(b['lqft'][q], 'symmetric', alpha)
                Z[i, j] = diag(dfun(Vi_warped, b['lqft'][q])).mean()
    return Z
Пример #4
0
def dtw(u,v):
    sm = cdist(u[:, np.newaxis],v[:, np.newaxis],metric='euclidean')
    p, q, C, phi = dp(sm)

    global i
    if i % 10000 == 0:
        print(i)
    i+=1

    return C[-1,-1]
Пример #5
0
def align_vowels(X,Y, dfun=distance.euc_normed):
    """
    DTW align vowels based on spectral data in X and Y
    Uses dpcore dynamic programming library
    inpus:
      X, Y - spectral data in rows (num columns must be equal)
    outputs:
      p, q, c - Dynamic Time Warp coefs: X[p]<-->Y[q], c=cost
    """
    Df = dfun(X,Y)
    p,q,C,phi = dpcore.dp(Df, penalty=0.1, gutter=0.1)
    return {'p':p,'q':q,'C':C}
Пример #6
0
def align_vowels(X, Y, dfun=distance.euc_normed):
    """
    DTW align vowels based on spectral data in X and Y
    Uses dpcore dynamic programming library
    inpus:
      X, Y - spectral data in rows (num columns must be equal)
    outputs:
      p, q, c - Dynamic Time Warp coefs: X[p]<-->Y[q], c=cost
    """
    Df = dfun(X, Y)
    p, q, C, phi = dpcore.dp(Df, penalty=0.1, gutter=0.1)
    return {'p': p, 'q': q, 'C': C}
Пример #7
0
def tsne_all(V,
             ref_idx=9,
             dfun=distance.euc_normed,
             dfw=True,
             tsne=True,
             perplexity=3,
             plotting=False,
             **kwargs):
    """
    Perform tsne / DTW / DFW timbre analysis on list of audio files, return pair-wise costs
    Uses tsne library
    inputs:
       V 	   - vowel analysis pool
    ref_idx    - reference audio file, all audio warped to this [9]
       dfun    - distance function to use [bregman.distance.euc_normed]
       dfw     - whether to perform dynamic FREQUENCY warping (dfw) after DTW [True]    
       tsne    - whether to return t-SNE or just DTW spectrum [True]  
    perplexity - how many near neighbours in t-SNE [3]
    plotting   - whether to plot the t-SNE result [False]
       **kwargs  - arguments to dpcore.dp DTW algorithm    
    outputs:
       Z - 2D projection map of audio files
    """
    Z = []
    kwargs['penalty'] = kwargs.pop('penalty', 0.1)
    kwargs['gutter'] = kwargs.pop('gutter', 0.0)
    for i in xrange(len(V)):
        C = dfun(V[ref_idx]['mfccs'], V[i]['mfccs'])
        p, q, c, phi = dpcore.dp(C, **kwargs)
        if not dfw:
            Z.append(V[i]['lqft'][q])
        else:
            alpha = optimal_vtln(V[i]['lqft'][q], V[ref_idx]['lqft'][p],
                                 'symmetric')
            print "Optimal DFW (VTLN) warp alpha=", alpha
            Vi_warped = vtln(V[i]['lqft'][q], 'symmetric', alpha)
            Z.append(Vi_warped)
    if tsne:
        X = array([zz.flatten() for zz in Z], dtype='f8')
        Z = _tsne(X, perplexity=perplexity, plotting=plotting)
    return Z
Пример #8
0
# <codecell>

plt.imshow(DC-DP, interpolation='none')
print np.max(np.abs(DC-DP))

# <codecell>

plt.imshow(phiC-phiP, interpolation='none')

# <codecell>

MM = np.random.rand(5, 5)
pen = 0.2
gut = 0.3
p,q,C,phi = dpcore.dp(MM, pen, gut)
print p, q
print MM
print C
print "best cost =", C[p[-1],q[-1]], "=", np.sum(MM[p, q])+pen*(np.sum(phi[p, q]>0))
plt.imshow(MM, interpolation='none', cmap='binary')
plt.hold(True)
plt.plot(q,p,'-r')
plt.hold(False)
plt.show()

# <codecell>

M2 = np.copy(M)
M2[20:30,20:30] += np.random.rand(10,10)
M2[10:40,10:40] += np.random.rand(30,30)
Пример #9
0
def dtwC(dist_mat, penalty):
    p, q, C, phi = dpcore.dp(dist_mat, penalty=penalty)
    return p, q, C, phi