forked from argriffing/xgcode
-
Notifications
You must be signed in to change notification settings - Fork 0
/
ctmcmi.py
693 lines (654 loc) · 20.5 KB
/
ctmcmi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
"""
Continuous time Markov chain mutual information.
This module is about reversible finite-state
continuous-time Markov processes and the mutual information
between two points in the process separated by a given amount of time.
Reversibility in this context means that the rate matrix
satisfies the detailed balance equations
and implies that the eigenvalues of the rate matrix are real.
This matrix is also assumed to be irreducible.
Combined with reversibility this means that exactly one
of the eigenvalues of the matrix is zero
while all of the other eigenvalues are negative.
Some approximations of the mutual information
have been implemented for very small and very large time separations.
An alternate formulation of mutual information between these random variables
is the expected log likelihood ratio between the joint distribution
of the separated points in the process and the product of their
marginal distributions.
Note that scipy gives eigenvalues in increasing order,
whereas numpy does not make any guarantees about their order.
"""
import math
import numpy as np
import scipy
from scipy import linalg
import mrate
def sample_distribution(n):
"""
@param n: number of states
"""
# Get a nonnegative vector.
v = np.random.rand(n)
# Divide the vector by its nonnegative sum.
distn = v / np.sum(v)
return distn
def sample_symmetric_rate_matrix(n):
"""
@param n: number of states
"""
# Get a nonnegative asymmetric matrix.
M = np.random.rand(n, n)
# Symmetrize by adding to the transpose.
S = M + M.T
# Subtract row sum from diagonal.
R = S - np.diag(np.sum(S, axis=1))
return R
############################################################################
# MUTUAL INFORMATION STUFF
def cute_MI_alternate(R, t):
"""
This is yet another implementation of a large t approximation of MI.
It is related the expectation of the ratio of the probability
of what you actually saw to the probability of seeing
what you saw given independence.
It is half of one less than this expectation.
It is not as numerically stable as other large t approximations.
"""
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
s = np.sqrt(p)
# get the expected log likelihood ratio
accum = 0
for i in range(n):
for j in range(n):
p_joint = p[i] * P[i, j]
p_independent = p[i] * p[j]
accum += p_joint * (p_joint / p_independent)
return (accum - 1) / 2
def cute_MI_alternate_b(R, t):
"""
It should closely approximate mutual information when t is not tiny.
"""
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
s = np.sqrt(p)
# get the expected log likelihood ratio
accum = 0
for i in range(n):
for j in range(n):
p_joint = p[i] * P[i, j]
value = p_joint / (s[i] * s[j]) - (s[i] * s[j])
accum += (value * value) / 2
return accum
def get_mutual_information_stable(R, t):
"""
This is a more stable function.
@return: unscaled_result, log_of_scaling_factor
"""
#FIXME under construction
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
P = np.zeros_like(R)
accum = 0
for i in range(n):
for j in range(n):
for k in range(n):
a = (v[j] / v[i])**0.5
b = U[i, k] * U[j, k]
c = math.exp(t * w[k])
P[i, j] += a * b * c
# compute the unscaled part of log(X(i,j)/(X(i)*X(j)))
for i in range(n):
for j in range(n):
if v[i] and P[i, j]:
coeff = v[i] * P[i, j]
numerator = P[i, j]
denominator = v[j]
# the problem is that the following log is nearly zero
value = coeff * math.log(numerator / denominator)
accum += np.real(value)
return accum
def get_mutual_information_approx(R, t):
"""
This is an approximation for large times.
It can be rewritten using orthogonality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
for j in range(n):
b = 0
for k in range(n-1):
b += U[i,k]*U[j,k]*math.exp(t*w[k])
accum += (b * b) / 2
return accum
def get_mutual_information_approx_b(R, t):
"""
This is an approximation for large times.
It has been rewritten using orthogonality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
for j in range(n):
for k in range(n-1):
accum += ((U[i,k]*U[j,k])**2) * math.exp(2*t*w[k]) / 2
return accum
def get_mutual_information_approx_c(R, t):
"""
This is an approximation for large times.
It has been rewritten using orthogonality.
It has also been rewritten using orthonormality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for k in range(n-1):
accum += math.exp(2*t*w[k])
return accum / 2
def get_mutual_information_small_approx(R, t):
"""
This is an approximation for small times.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
a = 0
for k in range(n):
a += (U[i, k]**2) * math.exp(t * w[k])
accum += v[i] * a * math.log(a / v[i])
#print [R[i, i] for i in range(n)]
#print [sum(U[i, k] * U[i, k] * w[k] for k in range(n)) for i in range(n)]
#print [sum(U[i, k] * U[i, k] for k in range(n)) for i in range(n)]
return accum
def get_mutual_information_small_approx_b(R, t):
"""
This is an approximation for small times.
Check a decomposition.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum_a = 0
accum_b = 0
accum_c = 0
accum_d = 0
for i in range(n):
a = 0
b = 0
for k in range(n):
prefix = U[i, k] * U[i, k]
a += prefix * math.exp(t * w[k])
for k in range(n-1):
prefix = U[i, k] * U[i, k]
b += prefix * math.exp(t * w[k])
x1 = v[i] * v[i]
x2 = v[i] * b
y1 = math.log(a)
y2 = -math.log(v[i])
accum_a += x1 * y1
accum_b += x1 * y2
accum_c += x2 * y1
accum_d += x2 * y2
return accum_a + accum_b + accum_c + accum_d
def get_mutual_information_small_approx_c(R, t):
"""
This is an approximation for small times.
This is an even more aggressive approximation.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
a = 0
for k in range(n):
prefix = U[i, k] * U[i, k]
a += prefix * math.exp(t * w[k])
accum += - v[i] * math.log(v[i]) * a
return accum
def get_mutual_information_small_approx_d(R, t):
"""
This is an approximation for small times.
This uses all of the off-diagonal entries of the mutual information
and also uses an approximation of the off-diagonal entries.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum_diag_a = 0
accum_diag_b = 0
accum_diag_c = 0
accum_diag_d = 0
for i in range(n):
a = 0
b = 0
for k in range(n):
prefix = U[i, k] * U[i, k]
a += prefix * math.exp(t * w[k])
for k in range(n-1):
prefix = U[i, k] * U[i, k]
b += prefix * math.exp(t * w[k])
x1 = v[i] * v[i]
x2 = v[i] * b
y1 = math.log(a)
y2 = -math.log(v[i])
accum_diag_a += x1 * y1
accum_diag_b += x1 * y2
accum_diag_c += x2 * y1
accum_diag_d += x2 * y2
accum_a = 0
accum_b = 0
accum_c = 0
accum_d = 0
for i in range(n):
for j in range(n):
if i != j:
prefix = (v[i] * v[j]) ** .5
a = 0
for k in range(n):
a += U[i, k] * U[j, k] * math.exp(t * w[k])
b = 0
for k in range(n-1):
b += U[i, k] * U[j, k] * math.exp(t * w[k])
x1 = v[i] * v[j]
x2 = prefix * b
y1 = math.log(a)
y2 = -math.log(prefix)
accum_a += x1 * y1
accum_b += x1 * y2
accum_c += x2 * y1
accum_d += x2 * y2
terms = [
accum_diag_a, accum_diag_b, accum_diag_c, accum_diag_d,
accum_a, accum_b, accum_c, accum_d]
for term in terms:
print term
return sum(terms)
def get_mutual_information_diff_approx(R, t):
"""
This is an approximation for large times.
It can be rewritten using orthogonality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
for j in range(n):
b = 0
for k in range(n-1):
b += U[i,k]*U[j,k]*math.exp(t*w[k])
c = 0
for k in range(n-1):
c += U[i,k]*U[j,k]*w[k]*math.exp(t*w[k])
accum += b * c
return accum
def get_mutual_information_diff_approx_b(R, t):
"""
This is an approximation for large times.
It has been rewritten using orthogonality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for i in range(n):
for j in range(n):
for k in range(n-1):
prefix = (U[i,k]*U[j,k])**2
accum += prefix * w[k] * math.exp(2*t*w[k])
return accum
def get_mutual_information_diff_approx_c(R, t):
"""
This is an approximation for large times.
It has been rewritten using orthogonality.
It has also been rewritten using orthonormality.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum = 0
for k in range(n-1):
accum += w[k]*math.exp(2*t*w[k])
return accum
def get_mutual_information_b(R, t):
"""
This uses some cancellation.
"""
n = len(R)
v = mrate.R_to_distn(R)
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
accum_diag_a = 0
accum_diag_b = 0
accum_diag_c = 0
for i in range(n):
a = 0
b = 0
for k in range(n):
prefix = U[i, k] * U[i, k]
a += prefix * math.exp(t * w[k])
for k in range(n-1):
prefix = U[i, k] * U[i, k]
b += prefix * math.exp(t * w[k])
x1 = v[i] * v[i]
x2 = v[i] * b
y1 = math.log(a)
y2 = -math.log(v[i])
accum_diag_a += x1 * y1
accum_diag_b += x1 * y2
accum_diag_c += x2 * y1
accum_a = 0
accum_b = 0
accum_c = 0
for i in range(n):
for j in range(n):
if i != j:
prefix = (v[i] * v[j]) ** .5
a = 0
for k in range(n):
a += U[i, k] * U[j, k] * math.exp(t * w[k])
b = 0
for k in range(n-1):
b += U[i, k] * U[j, k] * math.exp(t * w[k])
x1 = v[i] * v[j]
x2 = prefix * b
y1 = math.log(a)
y2 = -math.log(prefix)
accum_a += x1 * y1
accum_b += x1 * y2
accum_c += x2 * y1
terms = [
accum_diag_a, accum_diag_b, accum_diag_c,
accum_a, accum_b, accum_c]
return sum(terms)
def get_mutual_info_known_distn_fast(R, p, t):
"""
@param R: rate matrix
@param p: stationary distribution
@param t: time
@return: mutual information
"""
n = len(R)
P = scipy.linalg.expm(R*t)
return np.sum((P.T * p).T * np.log(P/p))
def get_mutual_info_known_distn(R, v, t):
return get_expected_ll_ratio_known_distn(R, v, t)
def get_mutual_information(R, t):
"""
Get the mutual information between two observations.
The two observations are of a
reversible finite-state continuous-time Markov process
and are separated by time t.
@param R: rate matrix
@param t: the amount of time separating the two observations
"""
return get_expected_ll_ratio(R, t)
def get_mutual_information_diff(R, t):
# define the number of states
n = len(R)
# define the transition matrix and its derivative
P = scipy.linalg.expm(R*t)
P_diff = mrate.expm_diff_spectral(R, t)
# define the stationary distribution
p = mrate.R_to_distn(R)
# get the expected log likelihood ratio
accum = 0
for i in range(n):
for j in range(n):
if p[i] and P[i, j]:
prefix = p[i] * P_diff[i, j]
suffix = 1 + math.log(P[i, j]) - math.log(p[j])
accum += prefix * suffix
return accum
def get_mutual_information_diff_b(R, t):
"""
This is a more symmetrized version.
Note that two of the three terms are probably structurally zero.
"""
# get non-spectral summaries
n = len(R)
P = scipy.linalg.expm(R*t)
p = mrate.R_to_distn(R)
# get spectral summaries
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
G = np.zeros_like(R)
for i in range(n):
for j in range(n):
G[i, j] = 0
for k in range(n):
G[i, j] += U[i, k] * U[j, k] * math.exp(t * w[k])
G_diff = np.zeros_like(R)
for i in range(n):
for j in range(n):
G_diff[i, j] = 0
for k in range(n):
G_diff[i, j] += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
B = np.outer(U.T[-1], U.T[-1])
term_a = np.sum(B * G_diff)
term_b = np.sum(B * G_diff * np.log(G))
term_c = -np.sum(B * G_diff * np.log(B))
#print term_a
#print term_b
#print term_c
return term_b
def get_mutual_information_diff_c(R, t):
"""
This is a more symmetrized version.
Some structurally zero terms have been removed.
"""
# get non-spectral summaries
n = len(R)
P = scipy.linalg.expm(R*t)
p = mrate.R_to_distn(R)
# get spectral summaries
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
B = np.outer(U.T[-1], U.T[-1])
G = np.zeros_like(R)
for i in range(n):
for j in range(n):
G[i, j] = 0
for k in range(n):
G[i, j] += U[i, k] * U[j, k] * math.exp(t * w[k])
G_diff = np.zeros_like(R)
for i in range(n):
for j in range(n):
G_diff[i, j] = 0
for k in range(n):
G_diff[i, j] += U[i, k] * U[j, k] * w[k] * math.exp(t * w[k])
return np.sum(B * G_diff * np.log(G))
def get_mutual_information_diff_zero(R):
"""
Derivative of mutual information at time zero.
Haha apparently this does not exist.
"""
# get non-spectral summaries
n = len(R)
# get spectral summaries
S = mrate.symmetrized(R)
w, U = np.linalg.eigh(S)
B = np.outer(U.T[-1], U.T[-1])
G = np.zeros_like(R)
for i in range(n):
for j in range(n):
G[i, j] = 0
for k in range(n):
G[i, j] += U[i, k] * U[j, k]
G_diff = np.zeros_like(R)
for i in range(n):
for j in range(n):
G_diff[i, j] = 0
for k in range(n):
G_diff[i, j] += U[i, k] * U[j, k] * w[k]
print G
print G_diff
print B
return np.sum(B * G_diff * np.log(G))
def get_expected_ll_ratio_known_distn(R, v, t):
#FIXME redundant function
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = v
# get the expected log likelihood ratio
accum = 0
for i in range(n):
for j in range(n):
if p[i] and P[i, j]:
coeff = p[i] * P[i, j]
# cancel the p[i] in the numerator and denominator
#numerator = p[i] * P[i, j]
#denominator = p[i] * p[j]
numerator = P[i, j]
denominator = p[j]
value = coeff * math.log(numerator / denominator)
if not np.allclose(np.imag(value), 0):
raise ValueError('rogue imaginary number')
accum += np.real(value)
return accum
def get_expected_ll_ratio(R, t):
"""
This is also the mutual information.
It is the mutual information between two observations
of a finite-state continuous-time Markov process at equilibrium
where the observations are separated by time t.
"""
#FIXME redundant function
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
# get the expected log likelihood ratio
accum = 0
for i in range(n):
for j in range(n):
if p[i] and P[i, j]:
coeff = p[i] * P[i, j]
# cancel the p[i] in the numerator and denominator
#numerator = p[i] * P[i, j]
#denominator = p[i] * p[j]
numerator = P[i, j]
denominator = p[j]
value = coeff * math.log(numerator / denominator)
if not np.allclose(np.imag(value), 0):
raise ValueError('rogue imaginary number')
accum += np.real(value)
return accum
def get_pinsker_lower_bound_mi(R, t):
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
# define the joint probability matrix at times t and infinity
J_t = np.dot(np.diag(p), P)
J_inf = np.outer(p, p)
#
return 0.5 * np.sum(abs(J_t - J_inf))**2
def get_row_based_plb_mi(R, t):
"""
Row based pinsker lower bound of mutual information.
"""
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
# define the joint probability matrix at times t and infinity
J_t = np.dot(np.diag(p), P)
J_inf = np.outer(p, p)
#
return sum(x * 0.5 * np.sum(abs(row - p))**2 for x, row in zip(p, P))
def get_row_based_hellinger_lb_mi(R, t):
"""
Row based hellinger lower bound of mutual information.
"""
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
# define the joint probability matrix at times t and infinity
J_t = np.dot(np.diag(p), P)
J_inf = np.outer(p, p)
#
return sum(x*np.sum((np.sqrt(row)-np.sqrt(p))**2) for x, row in zip(p, P))
def get_ll_ratio_wrong(R, t):
"""
In this function I try to reconstruct a buggy result in an email I got.
I think that the person who sent the email was trying to
compute the mutual information but they did something wrong,
so I want to see if I can figure out exactly what they were doing.
"""
# define the number of states
n = len(R)
# define the transition matrix
P = scipy.linalg.expm(R*t)
# define the stationary distribution
p = mrate.R_to_distn(R)
#
expected_likelihood_t = 0
for i in range(n):
for j in range(n):
if p[i] and P[i, j]:
coeff = p[i] * P[i, j]
expected_likelihood_t += coeff * p[i] * P[i, j]
#
expected_likelihood_inf = 0
for i in range(n):
for j in range(n):
if p[i] and P[i, j]:
coeff = p[i] * P[i, j]
#coeff = p[i] * p[j]
expected_likelihood_inf += coeff * p[i] * p[j]
#
lel_t = math.log(expected_likelihood_t)
lel_inf = math.log(expected_likelihood_inf)
#
return lel_t - lel_inf
def get_mi_decomposed(U, W, t):
"""
Get the mutual information at a given time using the decomposition.
Q = diag(p)^-(1/2) U W U' diag(p)^(1/2)
The stationary distribution is p.
Also sqrt(p) is the column of U corresponding to eigenvalue 0.
"""
pass