/
TP_kernel_approx_source.py
122 lines (104 loc) · 3.37 KB
/
TP_kernel_approx_source.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
####################### COMPTE-RENDU: INFMDI341 TP-KERNEL-METHODS ###########################
# Student: Shuyu Dong 22/06/2015
#############################################################################################
# Authors: Bellet, Gramfort, Salmon
from math import sqrt
import numpy as np
from scipy.sparse.linalg import svds
from scipy.linalg import svd
from sklearn.metrics.pairwise import rbf_kernel
def rank_trunc(gram_mat, k, fast=True):
"""
k-th order approximation of the Gram Matrix G.
Parameters
----------
gram_mat : array, shape (n_samples, n_samples)
the Gram matrix
k : int
the order approximation
fast : bool
use svd (if False) or svds (if True).
Return
------
gram_mat_k : array, shape (n_samples, n_samples)
The rank k Gram matrix.
"""
if fast:
u,s,v=svds(gram_mat,k)
# TODO Question 2-3
else:
U,S,V=svd(gram_mat) #full by default--> both U,V: [nxn] here (for G=<Gram_matrix>)
s=S[:k]
u=U[:k,:k]
v=V[:k,:k]
gram_mat_k = (u.dot(np.diag(s))).dot(v)
return gram_mat_k, u, s
def random_features(X_train, X_test, gamma, c=300, seed=44):
"""Compute random kernel features
Parameters
----------
X_train : array, shape (n_samples1, n_features)
The train samples.
X_test : array, shape (n_samples2, n_features)
The test samples.
gamma : float
The Gaussian kernel parameter
c : int
The number of components
seed : int
The seed for random number generation
Return
------
X_new_train : array, shape (n_samples1, c)
The new train samples.
X_new_test : array, shape (n_samples2, c)
The new test samples.
"""
rng = np.random.RandomState(seed)
# TODO Question 4
[n1,p]=X_train.shape
n2=X_test.shape[0]
# p iid Gaussian vectors
W=sqrt(2.0*gamma)*rng.randn(p,c)
# uniformly distributed phases in [0,2\pi)
b=rng.uniform(0,2*np.pi,size=c)
# x*W ([n1 x c]) + b([,c] constant along dim-0)
X_new_train = sqrt(2.0/c)*np.cos(X_train.dot(W)+np.outer(np.ones(n1),b))
X_new_test = sqrt(2.0/c)*np.cos(X_test.dot(W)+np.outer(np.ones(n2),b))
return X_new_train, X_new_test
def nystrom(X_train, X_test, gamma, c=500, k=200, seed=44):
"""Compute nystrom kernel approximation
Parameters
----------
X_train : array, shape (n_samples1, n_features)
The train samples.
X_test : array, shape (n_samples2, n_features)
The test samples.
gamma : float
The Gaussian kernel parameter
c : int
The number of points to sample for the approximation
k : int
The number of components
seed : int
The seed for random number generation
Return
------
X_new_train : array, shape (n_samples1, c)
The new train samples.
X_new_test : array, shape (n_samples2, c)
The new test samples.
"""
rng = np.random.RandomState(seed)
# TODO Question 6
[n1,p] = X_train.shape
# n2 = X_test.shape[0]
I=rng.randint(n1,size=c)
G=rbf_kernel(X_train[I],X_train[I])
Gk,uk,sk=rank_trunc(G,k) #fast: by default True
Mk= uk.dot(np.diag(np.sqrt(1./sk)))
Ttr = rbf_kernel(X_train, X_train[I])
Tte = rbf_kernel(X_test,X_train[I])
X_new_train = Ttr.dot(Mk)
X_new_test = Tte.dot(Mk)
return X_new_train, X_new_test