/
c_pdf_estimate.py
executable file
·270 lines (225 loc) · 6.82 KB
/
c_pdf_estimate.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
#! /usr/bin/env python
import sys, getopt, math, datetime, os
from math import sqrt, sin
from numpy import *
from pylab import plot,bar,show,legend,title,xlabel,ylabel,axis
from cvxopt.base import *
from cvxopt.blas import dot
from cvxopt.solvers import qp
from cvxopt import solvers
solvers.options['show_progress'] = False
from santa_fe import getData
_Functions = ['run']
def sign(x,y):
if isinstance(x, (int, long, float)):
return int( x > 0 )
else:
return int( sum(x>y) == len(x) )
class estimate:
def __init__(self,x,y,kernel):
# set variables
if len(x) != len(y):
raise StandardError, 'input/output values have different cardinality'
self.l = len(x)
self.x = x
self.y = y
self.kernel = kernel
self.beta = None
def xy(self,i,j):
################################################################################
#
# F_\ell(y,x) = frac{1}{\ell} \sum_{i=1}^{\ell} \theta(y-y_i) \theta(x-x_i)
#
# where y=i, x=j, l=self.l
# and i,j are both vectors of x and y (not indices of training data)
#
################################################################################
signmatrix = array( [ sign(i,self.x[k])*sign(j,self.y[k]) for k in range(self.l) ] )
return sum(signmatrix)/self.l
def r(self,x):
ret = zeros(self.kernel.n)
for i in range(self.kernel.l):
ret += self.kernel.y[i]*self.beta[i]*self.kernel._calc(x,self.kernel.x[i])
return ret
def equality_check(self):
c_matrix = matrix(0.0,(self.l,self.l))
for i in range(self.l):
for j in range(self.l):
c_matrix[i,j] = self.beta[j]*self.kernel.xx[i,j]/self.l
return sum(c_matrix)
def inequality_check(self):
c_matrix = matrix(0.0,(self.l,1))
for p in range(self.l):
p_matrix = matrix(0.0,(self.l,self.l))
for i in range(self.l):
for j in range(self.l):
p_matrix[i,j] = self.beta[i]*(self.kernel.xx[j,i]*sign(self.x[p],self.x[j])*
self.kernel.int(p,i)-self.xy(self.x[p],self.y[p]))/self.l
c_matrix[p,0] = sum(p_matrix)
return c_matrix
class kernel:
def __init__(self,data,gamma,sigma_q):
# set variables
self.l = len(data)-1
try:
self.n = len(data[0])
except TypeError:
self.n = 1
self.x = data[:-1]
self.y = data[1:]
self.xx = matrix(0.0,(self.l,self.l))
self.yy = matrix(0.0,(self.l,self.l))
self.intg = matrix(0.0,(self.l,self.l))
self.gamma = gamma
self.sigma = .5
# calculate xx matrix
#f=open('xx.matrix','r')
#self.xx.fromfile(f)
#f.close()
for i in range(self.l):
for j in range(i,self.l):
val = self._calc(self.x[i],self.x[j])
self.xx[i,j] = val
self.xx[j,i] = val
# normalize
self.xx /= (sum(self.xx)/self.l)
f=open('xx.matrix','w')
self.xx.tofile(f)
f.close()
print 'xx saved to file'
# calculate yy matrix
#f=open('yy.matrix','r')
#self.yy.fromfile(f)
#f.close()
for i in range(self.l):
for j in range(i,self.l):
val = self._calc(self.y[i],self.y[j])
self.yy[i,j] = val
self.yy[j,i] = val
# normalize
self.yy /= (sum(self.yy)/self.l)
f=open('yy.matrix','w')
self.yy.tofile(f)
f.close()
print 'yy saved to file'
# calculate integration matrix
#f=open('intg.matrix','r')
#self.intg.fromfile(f)
#f.close()
print 'computing integrals...'
for i in range(self.l):
for j in range(i,self.l):
val = self.int(i,j)
self.intg[i,j] = val
self.intg[j,i] = val
f=open('intg.matrix','w')
self.intg.tofile(f)
f.close()
print 'intg saved to file'
def int(self,i,j):
# \int_{-\infty}^{y_i} K_\gamma{y_i,y_j}dy_i
# When y_i is a vector of length 'n', the integral is a coordinate integral in the form
# \int_{-\infty}^{y_p^1} ... \int_{-\infty}^{y_p^n} K_\gamma(y',y_i) dy_p^1 ... dy_p^n
# note that self.y is a vector array, while self.yy is a matrix of K values
#
# After going over the math, the integral of the function should be calculated as follows
# take the sum of K for all values of y which have at least one dimension less than y_p
# times the inverse of lxn where l is the total number of y and n is the dimensionality of y
# select the row (*,j) of self.yy
yi = self.yy[self.l*j:self.l*(j+1)]
for n in range(self.l):
# scale K according to how many dimensions are less than y_p
# ( note that this also zeroes out y which are larger than y_p)
yi[n,0] = yi[n,0]*(sum(self.y[n]<self.y[i]))
# return the sum of the remaining values of K divided by lxn where l is the number of y and n is the dimensionality
return sum(yi)/(self.l*self.n)
def _calc(self,a,b):
return math.exp(-linalg.norm((a-b)/self.gamma))
def run():
# Retrieve dataset
data = getData('B1.dat')[:20]
#data = array([sin(i/4.) for i in range(33)])
# Construct Variables
K = kernel(data,gamma=.1,sigma_q=.5)
F = estimate(data[:-1],data[1:],K)
# Objective Function
print 'constructing objective function...'
P = mul(K.xx,K.yy)
q = matrix(0.0,(K.l,1))
# Equality Constraint
print 'constructing equality constraints...'
#A = matrix( [ sum( K.xx[ n*K.l:( n+1 )*K.l ] for n in range( K.l ) ) ], ( 1,K.l ) ) / K.l
A = matrix( [ sum( K.xx[ n::K.l ] for n in range( K.l ) ) ], ( 1,K.l ) ) / K.l
b = matrix(1.0)
# Inequality Constraint
print 'constructing inequality constraints...'
G = matrix(0.0, (K.l,K.l))
for m in range(K.l):
print "Inequality (%s,n) of %s calculated" % (m,K.l)
k = K.xx[m::K.l]
for n in range(m,K.l):
if K.n > 1:
t =array( [min(K.x[n] - K.x[i]) > 0 for i in range(K.l)] )
else:
t = array( [K.x[n] - K.x[i] > 0 for i in range(K.l)])
i = K.intg[m,n]
G[n,m] = sum(k*t*i)/K.l - F.xy(K.x[n],K.y[n])
G[m,n] = sum(k*t*i)/K.l - F.xy(K.x[n],K.y[n])
print G
h = matrix(K.sigma, (K.l,1))
# Optimize
#f=open('beta.matrix','r')
#F.beta = fromfile(f)
#f.close()
print 'starting optimization...'
optimized = qp(P, q,G=G, h=h, A=A, b=b)
F.beta = optimized['x']
print F.beta
f=open('beta.matrix','w')
F.beta.tofile(f)
f.close()
print 'beta saved to file'
# test on training data
x_1 = list()
y_1 = list()
for i in range(K.l):
x_1.append( F.r(K.x[i])[0] )
y_1.append( K.y[i][0] )
plot(x_1,label="x'")
plot(y_1,label="y")
legend()
show()
def help():
print __doc__
return 0
def process(arg='run'):
if arg in _Functions:
globals()[arg]()
class Usage(Exception): def __init__(self, msg): self.msg = msgdef main(argv=None): if argv is None: argv = sys.argv try: try: opts, args = getopt.getopt(sys.argv[1:], "hl:d:", ["help","list=","database="]) except getopt.error, msg: raise Usage(msg)
# process options for o, a in opts: if o in ("-h", "--help"):
for f in _Functions:
if f in args:
apply(f,(opts,args))
return 0 help()
# process arguments for arg in args: process(arg) # process() is defined elsewhere
except Usage, err: print >>sys.stderr, err.msg print >>sys.stderr, "for help use --help" return 2if __name__ == "__main__": sys.exit(main())