This repository has been archived by the owner on Mar 25, 2023. It is now read-only.
/
hsci_sparsification.py
128 lines (105 loc) · 4.84 KB
/
hsci_sparsification.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
import numpy as np
import utils
def HSCI(x, y, k, l):
""" Compute HSCI(x, y) whith k which is the kernel of the input space
and l which is the kernel of the output space """
m = x.shape[0]
assert(y.shape[0] == m)
ii = np.ones((m,1))
K = np.array([[k(x[i,:], x[j,:]) if i != j else 0. for i in range(m)] for j in range(m)])
L =np.array([[l(y[i,:], y[j,:]) if i != j else 0. for i in range(m)] for j in range(m)])
KL = np.dot(K, L)
return float(KL.trace() + reduce(np.dot, [ii.T, K, ii, ii.T, L, ii])/(m-1)/(m-2) - 2 * ii.T.dot(KL).dot(ii)/(m-2))/(m-3)/m
def pseudo_random_sparsify(learn_cfs, test_cfs, descriptizers,
max_iter=150, prob=0.1, limit=250):
""" Naive sparsification with Hilbert-Schmidt indepedance criteria """
spars_info = {}
#print descs.shape[0], lbl.shape[0]
var = float('inf')
selection = np.ones(2000, dtype = bool)
subselection = np.ones(2000, dtype = bool)
kernel = lambda xi, xj : np.exp(-np.dot(xj-xi, xj-xi))
iterations = 0
desc, lbl, t_desc, t_lables, descriptors_scaler, labels_scaler = utils.preprocess_data(learn_cfs, test_cfs, descriptizers)
while(selection.sum() > limit):
hs = 0.0
for i in range(max_iter):
b = np.logical_and(np.random.binomial(1, prob, 2000), selection)
var = HSCI(desc[b,:], lbl[b], kernel, kernel)
print var
if hs < var :
hs = var
subselection = b
selection = np.logical_and(selection, subselection)
lcfs = list( learn_cfs[i] for i in selection.nonzero()[0] )
mse = utils.GAP_predict(lcfs, test_cfs, descriptizers,
log=utils.empty_printer)[0]['diff_mse']
print 'Iterations %d : Taille de la selection : %d ; HSCI = %e ; mse = %e' % (iterations, selection.sum(), hs, mse)
spars_info[iterations] = {
'size_db' : selection.sum(),
'mse' : mse
}
iterations += 1
return spars_info
def sparsifyFOHSIC(learn_cfs, test_cfs, descriptizers, limit=250, max_iter=[2400, 1200, 600, 300, 200, 100, 100, 100, 100, 80, 80, 60, 50, 40, 40]):
""" Naive sparsification with Hilbert-Schmidt indepedance criteria """
spars_info = {}
#print descs.shape[0], lbl.shape[0]
var = float('inf')
#selection = np.random.binomial(1, 0.005, 2000)
selection = np.zeros(2000, dtype = bool)
subselection = np.zeros(2000, dtype = bool)
kernel = lambda xi, xj : np.exp(-np.dot(xj-xi, xj-xi))
iterations = 0
desc, lbl, t_desc, t_lables, descriptors_scaler, labels_scaler = utils.preprocess_data(learn_cfs, test_cfs, descriptizers)
"""
K = np.array([[kernel(desc[i,:], desc[j,:]) if i != j else 0. for i in range(2000)] for j in range(2000)])
L = np.array([[kernel(lbl[i,:], lbl[j,:]) if i != j else 0. for i in range(2000)] for j in range(2000)])
KL = K*L
Cub = np.array([[[K[i,k]*L[k,j] for i in range(2000)] for j in range(2000)] for k in range(2000)])
lcfs = list( learn_cfs[i] for i in selection.nonzero()[0] )
mse = utils.GAP_predict(lcfs, test_cfs, descriptizers,
log=utils.empty_printer)[0]['diff_mse']
#print 'Iterations %d : Taille de la selection : %d ; HSCI = %e ; mse = %e' % (iterations, selection.sum(), hs, mse)
spars_info[iterations] = {
'size_db' : selection.sum(),
'mse' : mse
}
iterations += 1
"""
while(selection.sum() < limit):
hs = 0.0
#for i in range(2000):
if iterations >= len(max_iter):
repet = 30
else:
repet = max_iter[iterations]
for i in range(repet):
b = np.logical_or(np.random.binomial(1, 0.01, 2000), selection)
#b[i] = True
var = HSCI(desc[b,:], lbl[b], kernel, kernel)
print var
#m = selection.sum()
#print 'HSCI 2 :'
#var2 = (KL[b,:][:,b].sum() + K[b,:][:,b].sum()*L[b,:][:,b].sum()/(m-1)/(m-2) - 2*Cub[b,:,:][:,b,:][:,:,b].sum()/(m-2))/(m-3)/m
#print var2
if hs < var :
hs = var
subselection = b
selection = subselection
lcfs = list( learn_cfs[i] for i in selection.nonzero()[0] )
mse = utils.GAP_predict(lcfs, test_cfs, descriptizers,
log=utils.empty_printer)[0]['diff_mse']
print 'Iterations %d : Taille de la selection : %d ; HSIC = %e ; mse = %e' % (iterations, selection.sum(), hs, mse)
spars_info[iterations] = {
'size_db' : selection.sum(),
'mse' : mse
}
iterations += 1
return spars_info
if __name__=='__main__':
""" TEST DE HSCI """
x = np.ones((200,8))
y = np.zeros((200,1))
k = lambda xi, xj : np.exp(-np.dot(xj-xi, xj-xi))
print HSCI(x, y, k, k)