forked from peterroelants/Theano_examples
-
Notifications
You must be signed in to change notification settings - Fork 0
/
linear_regression.py
236 lines (205 loc) · 7.65 KB
/
linear_regression.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
import random
import numpy as np
import matplotlib.pyplot as plt
import theano.tensor as T
from theano import function
from theano.sandbox.linalg import ops as linOps
"""
3 examples on how to compute linear regression in Theano.
1) The first example uses Ordinarly Least Squares (OLS) to fit the parameters.
2) The second example uses the pseudoinverse to compute the Linear Least Squares fit
of the parameters.
3) The third example uses a stupid gradient descent algorithm to fit the parameters.
The main use of this example is to illustrate the use of gradients in theano.
"""
###########################################################
# Data and plot functions
###########################################################
def generateRandomSampleData():
"""
Generate random data around the line y = b + ax.
With: b=2 and a = 0.5
Out:
tuple (xs, ys)
xs: x-data with intercept for the regression.
Each row is one sample.
ys: y-data
Each row is one sample.
xs[i] corresponds to ys[i]
"""
x_min = 0
x_max = 10
b = 2
a = 0.5
sigma = 0.5
xs = []
ys = []
for i in range(20):
x = random.uniform(x_min, x_max)
y_mu = b + a * x
y = random.normalvariate(y_mu, sigma)
xs.append([1, x])
ys.append([y])
return (np.array(xs), np.array(ys))
def plotData(xs, ys, b1, b2, b3):
# plot data points
plt.plot(xs[:,1], ys, 'b+')
# plot orginal line
x_line = np.linspace(0, 10)
y_line = 2 + x_line * 0.5
plt.plot(x_line, y_line, 'b', label='original')
# plot line fitted by first method
y_line = b1[0][0] + x_line * b1[1][0]
plt.plot(x_line, y_line, 'y', label='regression 1')
# plot line fitted by second method
y_line = b2[0][0] + x_line * b2[1][0]
plt.plot(x_line, y_line, 'g', label='regression 2')
# plot line fitted by third method
y_line = b3[0][0] + x_line * b3[1][0]
plt.plot(x_line, y_line, 'r', label='regression 3')
plt.show()
###########################################################
# Regression functions
###########################################################
"""
General linear model:
---------------------
y_i = b_0 + b_1 * X_i_1 + b_2 * X_i_2 + ... + b_{p-1} * X_i_{p-1} + e_i
for i = 1 .. n
y = X*b + e
n = number of observations
p = number of input dimensions
X_i_j: nxp matrix (input matrix )
i: row -> observations (1 .. n)
j: column -> input params (0 .. p-1)
X_i_0 is the intercept term (= 1 if an intercept is needed)
y_i: column vector (output vector for observations)
i: row -> observations (1 .. n)
b_j: column vector (regression slopes for each input dimension)
j: row -> input params (0 .. p-1)
b_0 is the intercept if X_i_0 = 1
e_i: column vector (random errors for each observation)
i: row -> observations (1 .. n)
"""
def linearRegression(inputs, outputs):
"""
Computers the least squares estimator (LSE) B_hat that minimises the sum of the
squared errors.
In:
inputs: Matrix of inputs (X) (nxp matrix)
format: [[observation_1], ..., [observation_n]]
outputs: Column vector (Matrix) of outputs y
format: [[y_1], ... , [y_n]]
Out:
B_hat: Column vector (Matrix) of fitted slopes
format: [[b_0], ... , [b_{p-1}]]
"""
return linearRegression_1(inputs, outputs)
def linearRegression_1(inputs, outputs):
"""
Computers the least squares estimator (LSE) B_hat that minimises the sum of the
squared errors.
Computes B_hat as B_hat = (X.T . X)^-1 . X.T . y
-> Ordinarly Least Squares (OLS)
http://en.wikipedia.org/wiki/Ordinary_least_squares
In:
inputs: Matrix of inputs (X) (nxp matrix)
format: [[observation_1], ..., [observation_n]]
outputs: Column vector (Matrix) of outputs y
format: [[y_1], ... , [y_n]]
Out:
B_hat: Column vector (Matrix) of fitted slopes
format: [[b_0], ... , [b_{p-1}]]
"""
X = T.dmatrix('X')
y = T.dcol('y')
# B_hat = (X.T . X)^-1 . X.T . y
# http://deeplearning.net/software/theano/library/sandbox/linalg.html
# MatrixInverse is the class.
# matrix_inverse is the method base upon the MatrixInverse class.
B_hat = T.dot(T.dot(linOps.matrix_inverse(T.dot(X.T, X)),X.T),y)
lse = function([X, y], B_hat)
b = lse(inputs, outputs)
return b
def linearRegression_2(inputs, outputs):
"""
Computers the least squares estimator (LSE) B_hat that minimises the sum of the
squared errors.
Computes B_hat as B_hat = X^+ . y
with X^+ the pseudoinverse of matrix X.
http://en.wikipedia.org/wiki/Moore-Penrose_pseudoinverse
In:
inputs: Matrix of inputs (X) (nxp matrix)
format: [[observation_1], ..., [observation_n]]
outputs: Column vector (Matrix) of outputs y (nx1 matrix)
format: [[y_1], ... , [y_n]]
Out:
B_hat: Column vector (Matrix) of fitted slopes (px1 matrix)
format: [[b_0], ... , [b_{p-1}]]
"""
X = T.dmatrix('X')
y = T.dcol('y')
# http://deeplearning.net/software/theano/library/sandbox/linalg.html
# MatrixPinv is the class.
# pinv is the method based upon the MatrixPinv class.
# B_hat = X^+ . y
B_hat = T.dot(linOps.pinv(X),y)
lse = function([X, y], B_hat)
b = lse(inputs, outputs)
return b
def linearRegression_3(inputs, outputs):
"""
Computers the least squares estimator (LSE) B_hat that minimises the sum of the
squared errors.
Computes B_hat with the use of gradient descent:
http://en.wikipedia.org/wiki/Gradient_descent
In:
inputs: Matrix of inputs (X) (nxp matrix)
format: [[observation_1], ..., [observation_n]]
outputs: Column vector (Matrix) of outputs y (nx1 matrix)
format: [[y_1], ... , [y_n]]
Out:
B_hat: Column vector (Matrix) of fitted slopes (px1 matrix)
format: [[b_0], ... , [b_{p-1}]]
"""
X = T.dmatrix('X') # inputs
z = T.dcol('y') # targets
B_hat = T.dcol('B_hat') # parameter estimates
# define the cost function
y = T.dot(X, B_hat) # outputs
err = (z - y) # error function
cost = (err ** 2).sum() # the cost to minimize
# Gradient expression.
cost_B_grad = T.grad(cost, [B_hat])
# Compile cost function and its gradient.
cost_fun = function([B_hat, X, z], cost) # can be used to debug or early stopping
cost_grad_fun = function([B_hat, X, z], cost_B_grad)
# initialise parameter estimates
b = np.random.randn(inputs.shape[1],1)
# Gradient descent
nb_of_iterations = 500;
step_size = 0.001;
for i in range(nb_of_iterations):
cost_grad_res = cost_grad_fun(b, inputs, outputs)
# cost_grad_res is a list of arrays
cost_grad_res = cost_grad_res[0]
# update the parameter estimates according to the first order gradients
for j in range(b.shape[0]):
b[j] -= cost_grad_res[j][0] * step_size
return b
###########################################################
# main
###########################################################
def main():
# Set random seed to make experiment repeatable.
random.seed(1)
xs, ys = generateRandomSampleData()
b1 = linearRegression_1(xs, ys)
print("b1 parameters found: " + str(b1))
b2 = linearRegression_3(xs, ys)
print("b2 parameters found: " + str(b2))
b3 = linearRegression_3(xs, ys)
print("b3 parameters found: " + str(b3))
plotData(xs, ys, b1, b2, b3)
if __name__ == "__main__":
main()