forked from yaringal/CLGP
/
CLGP_theano_model.py
150 lines (135 loc) · 8.17 KB
/
CLGP_theano_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
# To speed Theano up, create ram disk: mount -t tmpfs -o size=512m tmpfs /mnt/ramdisk
# Then use flag THEANO_FLAGS='base_compiledir=/mnt/ramdisk' python script.py
import sys; sys.path.insert(0, "../Theano"); sys.path.insert(0, "../../Theano")
import theano; import theano.tensor as T; import theano.sandbox.linalg as sT
import numpy as np
import cPickle
print 'Theano version: ' + theano.__version__ + ', base compile dir: ' + theano.config.base_compiledir
theano.config.mode = 'FAST_RUN'
theano.config.optimizer = 'fast_run'
theano.config.reoptimize_unpickled_function = False
eps = 1e-4
class kernel:
def RBF(self, sf2, l, X1, X2 = None):
_X2 = X1 if X2 is None else X2
dist = ((X1 / l)**2).sum(1)[:, None] + ((_X2 / l)**2).sum(1)[None, :] - 2*(X1 / l).dot((_X2 / l).T)
RBF = sf2 * T.exp(-dist / 2.0)
return (RBF + eps * T.eye(X1.shape[0])) if X2 is None else RBF
def RBFnn(self, sf2, l, X):
return sf2 + eps
def LIN(self, sl2, X1, X2 = None):
_X2 = X1 if X2 is None else X2
LIN = sl2 * (X1.dot(_X2.T) + 1)
return (LIN + eps * T.eye(X1.shape[0])) if X2 is None else LIN
def LINnn(self, sl2, X):
return sl2 * (T.sum(X**2, 1) + 1) + eps
class CLGP_model:
def __init__(self, params, sx2 = 1, linear_model = False, samples = 20, use_hat = False):
ker, self.samples, self.params, self.KmmInv = kernel(), samples, params, {}
self.use_hat = use_hat
model_file_name = 'model' + ('_hat' if use_hat else '') + ('_linear' if linear_model else '') + '.save'
try:
print 'Trying to load model...'
with open(model_file_name, 'rb') as file_handle:
obj = cPickle.load(file_handle)
self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d = obj
self.update_KmmInv_cache()
print 'Loaded!'
return
except:
print 'Failed. Creating a new model...'
Y, Z, m, ls, mu, lL, eps_MK, eps_NQ, eps_NK, KmmInv = T.dmatrices('Y', 'Z', 'm', 'ls', 'mu',
'lL', 'eps_MK', 'eps_NQ', 'eps_NK', 'KmmInv')
lhyp = T.dvector('lhyp')
(M, K), N, Q = mu.shape, m.shape[0], Z.shape[1]
s, sl2, sf2, l = T.exp(ls), T.exp(lhyp[0]), T.exp(lhyp[1]), T.exp(lhyp[2:2+Q])
L = T.tril(lL - T.diag(T.diag(lL)) + T.diag(T.exp(T.diag(lL))))
print 'Setting up cache...'
Kmm = ker.RBF(sf2, l, Z) if not linear_model else ker.LIN(sl2, Z)
KmmInv_cache = sT.matrix_inverse(Kmm)
self.f_Kmm = theano.function([Z, lhyp], Kmm, name='Kmm')
self.f_KmmInv = theano.function([Z, lhyp], KmmInv_cache, name='KmmInv_cache')
self.update_KmmInv_cache()
self.dKmm_d = {'Z': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), Z), name='dKmm_dZ'),
'lhyp': theano.function([Z, lhyp], T.jacobian(Kmm.flatten(), lhyp), name='dKmm_dlhyp')}
print 'Setting up model...'
if not self.use_hat:
mu_scaled, L_scaled = sf2**0.5 * mu, sf2**0.5 * L
X = m + s * eps_NQ
U = mu_scaled + L_scaled.dot(eps_MK)
Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
A = KmmInv.dot(Kmn)
B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
F = A.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
F = T.concatenate((T.zeros((N,1)), F), axis=1)
S = T.nnet.softmax(F)
LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
if not linear_model:
KL_U = -0.5 * (T.sum(KmmInv.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
+ K * (T.sum(KmmInv.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
+ 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
else:
KL_U = 0
#KL_U = -0.5 * T.sum(T.sum(mu_scaled * KmmInv.dot(mu_scaled), 0) + T.sum(KmmInv * L_scaled.dot(L_scaled.T)) - M
# - 2.0*T.sum(T.log(T.diag(L_scaled))) + 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))) if not linear_model else 0
else:
# mu_scaled, L_scaled = mu / sf2**0.5, L / sf2**0.5
mu_scaled, L_scaled = mu / sf2, L / sf2
X = m + s * eps_NQ
U = mu_scaled + L_scaled.dot(eps_MK)
Kmn = ker.RBF(sf2, l, Z, X) if not linear_model else ker.LIN(sl2, Z, X)
Knn = ker.RBFnn(sf2, l, X) if not linear_model else ker.LINnn(sl2, X)
B = Knn - T.sum(Kmn * KmmInv.dot(Kmn), 0)
F = Kmn.T.dot(U) + T.maximum(B, 1e-16)[:,None]**0.5 * eps_NK
F = T.concatenate((T.zeros((N,1)), F), axis=1)
S = T.nnet.softmax(F)
LS = T.sum(T.log(T.maximum(T.sum(Y * S, 1), 1e-16)))
if not linear_model:
KL_U = -0.5 * (T.sum(Kmm.T * T.sum(mu_scaled[:,None,:]*mu_scaled[None,:,:], 2))
+ K * (T.sum(Kmm.T * L_scaled.dot(L_scaled.T)) - M - 2.0*T.sum(T.log(T.diag(L_scaled)))
- 2.0*T.sum(T.log(T.diag(sT.cholesky(Kmm))))))
else:
KL_U = 0
KL_X_all = -0.5 * T.sum((m**2.0 + s**2.0)/sx2 - 1.0 - 2.0*ls + T.log(sx2), 1)
KL_X = T.sum(KL_X_all)
print 'Compiling...'
inputs = {'Y': Y, 'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv,
'eps_MK': eps_MK, 'eps_NQ': eps_NQ, 'eps_NK': eps_NK}
z = 0.0*sum([T.sum(v) for v in inputs.values()]) # solve a bug with derivative wrt inputs not in the graph
f = zip(['X', 'U', 'S', 'LS', 'KL_U', 'KL_X', 'KL_X_all'], [X, U, S, LS, KL_U, KL_X, KL_X_all])
self.f = {n: theano.function(inputs.values(), f+z, name=n, on_unused_input='ignore') for n,f in f}
g = zip(['LS', 'KL_U', 'KL_X'], [LS, KL_U, KL_X])
wrt = {'Z': Z, 'm': m, 'ls': ls, 'mu': mu, 'lL': lL, 'lhyp': lhyp, 'KmmInv': KmmInv}
self.g = {vn: {gn: theano.function(inputs.values(), T.grad(gv+z, vv), name='d'+gn+'_d'+vn,
on_unused_input='ignore') for gn,gv in g} for vn, vv in wrt.iteritems()}
with open(model_file_name, 'wb') as file_handle:
print 'Saving model...'
sys.setrecursionlimit(2000)
cPickle.dump([self.f, self.g, self.f_Kmm, self.f_KmmInv, self.dKmm_d], file_handle, protocol=cPickle.HIGHEST_PROTOCOL)
def update_KmmInv_cache(self):
self.KmmInv = self.f_KmmInv(self.params['Z'], self.params['lhyp']).astype(theano.config.floatX)
def exec_f(self, f, Y = [[[0]]], modality = 0, mask = None):
inputs, (M, K), (N, Q) = {}, self.params['mu' + str(modality)].shape, self.params['m'].shape
inputs['Z'], inputs['m'], inputs['ls'] = self.params['Z'], self.params['m'], self.params['ls']
inputs['mu'], inputs['lL'] = self.params['mu' + str(modality)], self.params['lL'][modality]
inputs['lhyp'], inputs['KmmInv'] = self.params['lhyp'], self.KmmInv
inputs['eps_MK'], inputs['eps_NQ'], inputs['eps_NK'] = np.random.randn(M,K), np.random.randn(N,Q), np.random.randn(N,K)
inputs['Y'] = Y[modality] if len(Y) > 1 else Y[0]
if mask is not None:
inputs['Y'], inputs['m'], inputs['ls'] = inputs['Y'][~mask], inputs['m'][~mask], inputs['ls'][~mask]
inputs['eps_NQ'], inputs['eps_NK'] = inputs['eps_NQ'][~mask], inputs['eps_NK'][~mask]
return f(**inputs)
def estimate(self, f, Y = [[[0]]], modality = 0, mask = None, samples = None):
# np.random.seed(0)
f_acc = np.array([self.exec_f(f, Y, modality, mask) for s in xrange(samples if samples != None else self.samples)])
return np.nanmean(f_acc, 0), np.nanstd(f_acc, 0)
def ELBO(self, Y, mask = None):
if mask is None:
mask = np.zeros((self.params['m'].shape[0], len(Y)), dtype=bool)
ELBO, std_sum = self.exec_f(self.f['KL_X'], Y, mask=np.all(mask, 1)), 0
for modality in xrange(len(Y)):
LS, std = self.estimate(self.f['LS'], Y, modality, mask[:, modality])
ELBO += self.exec_f(self.f['KL_U'], Y, modality, mask[:, modality]) + LS
std_sum += std**2
return ELBO, std_sum**0.5