-
Notifications
You must be signed in to change notification settings - Fork 0
/
Sparse_Coding.py
185 lines (159 loc) · 7.89 KB
/
Sparse_Coding.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
import os
import re
import theano
import string
import time
import theano.tensor as T
from theano.sandbox.linalg.ops import matrix_inverse
import numpy as np
import numpy.random as random
class sparsity_coefficient:
#This class represents a single coefficient in the sparse representation
def __init__( self, num_coefficients = 10, initial_values = None, sparsity_penalty = 0, beta = None,
epsilon = None ):
'''
This initialization takes:
-an initial value which is initialized to a 0 mean gaussian
distribution if not provided
-a sparsity penalty function
where:
0 -> L1 penalty
1 -> epsilon L1 penalty
2 -> log penalty
-an initial beta value which is initialized to 1 if not provided
-an optional epsilon argument for the epsilon L1 penalty which is
initialized to 0 if not provided
'''
if initial_values == None:
print 'Initializing to random sparsity coefficients'
self.value = np.zeros( num_coefficients )
else:
print 'Using user provided inital values'
self.value = initial_values
if beta == None:
print 'Initializing beta to 1'
self.beta = 1
else:
print 'Using user proved beta value'
self.beta = beta
if epsilon == None:
print 'Initializing epsilon to 0'
self.epsilon = 0
else:
print 'Using user proved epsilon value'
self.epsilon = epsilon
#These lines compute the sparsity penalty
if sparsity_penalty == 0:
print 'Using L1 regularization'
self.sparsity_penalty = np.linalg.norm( self.value, ord = 1 )
elif sparsity_penalty == 1:
print 'Using epsilon L1'
self.sparsity_penalty = ( ( self.value ** 2 + self.epsilon ) ** 0.5 )
else:
print 'Using log regularization'
self.sparsity_penalty = np.log( 1 + self.value ** 2 )
#This computes the prior probability of the sparsity coefficient
self.prior = T.exp( - self.beta * self.sparsity_penalty )
class base_vector:
#This class represents a single base vector
def __init__( self, vector_size, initial_values = None ):
'''
This initializes a single base vector and takes:
-a vector of initial values which is initialized to a gaussian normal
distribution if not provided
-the size of the base vector which is a required input
'''
if initial_values == None:
print 'Using randomly initialized base vectors'
init_value = random.rand( vector_size )
init_value = init_value - np.mean( init_value )
self.value = init_value
elif len( initial_values ) != vector_size:
raise ValueError( 'initial values not the same dimmension as vector_size' )
else:
print 'Using user provided base vectors'
self.value = initial_values
class sparsity_model:
#This class holds all the sparsity coefficients and bases
def __init__( self, input, num_bases = 10, sparsity_function = 0, beta_top = None,
epsilon_top = None, learning_rate = 0.1, initial_coefficients = None,
initial_bases = None ):
'''
This initializes the sparsity model and takes:
-a required input which corresponds to the ( n x k ) training data
-a sparsity penalty function
where:
0 -> L1 penalty
1 -> epsilon L1 penalty
2 -> log penalty
-an initial beta value which is initialized to 1 if not provided
-an optional epsilon argument for the epsilon L1 penalty which is
initialized to 0 if not provided
'''
self.x = input
self.num_examples = len( input )
self.vector_dim = len( input[ 0 ] )
self.learning_rate = learning_rate
if initial_coefficients == None:
#Initialize the sparsity coefficients
coefficients = np.asarray( [ sparsity_coefficient( num_coefficients = num_bases,
sparsity_penalty = sparsity_function, beta = beta_top,
epsilon = epsilon_top).value for i in range( self.num_examples ) ] )
self.coefficients = theano.shared( value = coefficients, name = 'coefficients', borrow = True )
else:
#Initialize the sparsity coefficients
coefficients = np.asarray( [ sparsity_coefficient( num_coefficients = num_bases, initial_values = initial_coefficients,
sparsity_penalty = sparsity_function, beta = beta_top,
epsilon = epsilon_top).value for i in range( self.num_examples ) ] )
self.coefficients = theano.shared( value = coefficients, name = 'coefficients', borrow = True )
if initial_bases == None:
#Initialize the dictionary bases
bases = np.asarray( [ base_vector( vector_size = self.vector_dim ).value for i in range( num_bases ) ] )
self.bases = theano.shared( value = bases, name = 'bases', borrow = True )
else:
#Initialize the dictionary bases
bases = np.asarray( [ base_vector( vector_size = self.vector_dim, initial_values = initial_bases ).value for i in range( num_bases ) ] )
self.bases = theano.shared( value = bases, name = 'bases', borrow = True )
self.variance = T.var( self.bases )
if beta_top == None:
self.gamma = 2 * self.variance
else:
self.gamma = 2 * self.variance * beta_top
self.params = [ self.coefficients, self.bases ]
def feature_sign_search( self ):
'''
This function runs the feature_sign_search on the coefficients while
holding the bases clamped.
'''
#Declare effective zero for usefullness
effective_zero = 1e-19
opt_cond = np.inf
'''
theta[ i ] is:
-1 if self.coefficients[ i ] < 0
1 if self.coefficients[ i ] > 0
0 if self.coefficients[ i ] == 0
'''
theta = T.sgn( self.coefficients )
active_set = T.ivector( name = 'active_set' )
#This corresponds to the gram matrix by dotting the basis vectors by it's transpose
gram_matrix = T.dot( self.bases.T, self.bases )
target_correlation = T.dot( self.bases.T, self.x )
cost = -T.sum( ( target_correlation - T.dot( gram_matrix, self.coefficients ) ) ** 2 )
cost_grad = T.grad( cost, self.coefficients )
candidate = T.argmax( cost_grad )
if T.gt( cost_grad[ candidate ], self.gamma ):
print 'Found candidate greater than gamma'
updated_theta = T.set_subtensor( theta[ candidate ], -1 )
active_set = active_set + candidate
if T.lt( cost_grad[ candidate ], ( -1 * self.gamma ) ):
print 'Found candidate less than negative gamma'
updated_theta = T.set_subtensor( theta[ candidate ], 1 )
active_set = active_set + candidate
active_bases = self.bases[ active_set ]
active_coefficients = self.coefficients[ active_set ]
active_theta = updated_theta[ active_set ]
new_coefficients = ( matrix_inverse( T.dot( active_bases.T, active_bases ) ) *
( T.dot( active_bases.T, target_correlation )
- 0.5 * self.gamma * active_theta ) )
sign_changes = 0