/
gmm_em.py
176 lines (152 loc) · 4.61 KB
/
gmm_em.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
import numpy as np
import soundfile as sf
import sidekit as skk
import os
import sys
import multiprocessing
import logging
import htkmfc
import time
from scipy import sparse
def gmm_em(dataLista, nmixa, final_nitera, ds_factora, nworkers, gmmFilename):
try:
nmix = float(nmixa)
except:
nmix = 256 #default
try:
final_niter = float(final_nitera)
except:
final_niter = 10 #default
try:
ds_factor = float(ds_factora)
except:
ds_factor = 1 # default
if not is_power2(nmix):
print('Error nmix must be power of 2')
# Load data
dataList = load_data(dataLista)
nfiles = dataList.__len__()
print ('\n\n Initializing the GMM hyperparameters ...\n')
gm, gv = comp_gm_gv(dataList)
gmm = gmm_class(gm, gv)
niter = [1,2,4,4,4,4,6,6,10,10,15];
idd = np.log2(nmix) + 1
niter[idd] = final_niter;
mix =1
while (mix <= nmix):
if (mix >= nmix/2):
ds_factor = 1
print ('\nRe-estimating the GMM hyperparameters for %d components ...\n', mix)
for iter in range(0,niter(np.log2(mix))):
print('EM iter#: %d \t', iter)
N = 0;
F = 0;
S = 0;
L = 0;
nframes = 0;
start_time = time.time()
for ix in range(0,nfiles-1):
n, f, s, l = expectation(dataList[ix][:, 0:ds_factor:len(dataList[ix])-1], gmm);
N = N + n;
F = F + f;
S = S + s;
L = L + sum(l);
nframes = nframes + len(l);
elapsed_time = time.time() - start_time
print('[llk = %.2f] \t [elaps = %.2f s]\n', L / nframes, elapsed_time);
gmm = maximization(N, F, S);
if (mix < nmix):
gmm = gmm_mixup(gmm);
mix = mix * 2;
return gmm
class gmm_class(object):
def __init__(self, glob_mu, glob_sigma):
self.mu = glob_mu
self.sigma = glob_sigma
self.w = 1
def is_power2(num):
'states if a number is a power of two'
return num != 0 and ((num & (num - 1)) == 0)
def load_data(datalist):
if isinstance(datalist, basestring):
with open(datalist) as f:
lines = f.read().splitlines()
#need htk read here
nfiles = len(lines)
dtList = []
for ix in range(1,nfiles):
signaal, sampleratea = sf.read(lines[ix])
aaa = skk.mfcc(signaal)
dtList[ix] = aaa[0]
result = dtList
else:
result = datalist
return result
def comp_gm_gv(dataList):
#compute global mean and variance
globalmean = np.mean(dataList)
gvariance = np.var(dataList)
return globalmean, gvariance
def expectation(data, gmm):
post, llk = postprob(data, gmm.mu, gmm.sigma, gmm.w)
N = np.transpose(sum(post, 2))
F = data * np.transpose(post)
t = np.array(data)
S = (t*t) * np.transpose(post)
return N, F ,S, llk
def postprob(data, mu, sigma, w):
post = lgmmprob(data, mu, sigma, w);
llk = logsumexp(post, 1);
pp = np.array(post)
lll = np.array(llk)
post = np.exp(post - llk)
return post, llk
def lgmmprob (datat ,mu,sigma,w):
nd = np.shape(datat)
ndim = nd[1]
muu = np.array(mu)
sigg = np.array(sigma)
data = np.array(datat)
C = sum(muu * muu / sigg) + sum(np.log(sigg));
D = np.transpose(1. / sigma) * (data * data) - 2 * np.transpose(mu/sigma) * data + ndim * np.log(2 * np.math.pi);
logprob = -0.5 * (np.transpose(C) + D)
logprob = logprob + np.log(w)
return logprob
def logsumexp(x,dim):
xmax = np.max(x,dim)
y= xmax + np.log(np.sum(np.exp(x-xmax), dim));
return y
def maximization(Nr, Fr, Sr):
N= np.array(Nr)
F = np.array(Fr)
S = np. array(Sr)
w = N / np.sum(N);
mu = F/N;
sigma = (S/N) - (mu*mu);
sigma = apply_var_floors(w, sigma, 0.1);
gmm.w = w;
gmm.mu= mu;
gmm.sigma = sigma;
return gmm
def apply_var_floors(wr, sigmar, floor_constr):
sigma = np.array(sigmar)
w = np.array(wr)
floor_const = np.array(floor_constr)
vFloor = sigma * np.transpose(w) * floor_const;
sigma = np.max(sigma, vFloor)
return sigma
def gmm_mixup(gmm):
mu = gmm.mu; sigma = gmm.sigma; w = gmm.w
[ndim, nmix] = np.size(sigma)
[sig_max, arg_max] = max(sigma)
eps = sparse(0 * mu)
for inx in range(0,nmix-1):
idx = arg_max + (inx - 1) * np.size(ndim, nmix);
eps[idx] = np.sqrt(sig_max)
mu = [mu - eps, mu + eps];
sigma = [sigma, sigma];
w = [w, w] * 0.5;
gmm.w = w;
gmm.mu = mu;
gmm.sigma = sigma;
return gmm