/
svm.py
273 lines (198 loc) · 9.48 KB
/
svm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
from __future__ import annotations
import math
import scipy.optimize
import numpy.random
import matplotlib.pyplot
from typing import Union, List
from operator import add, mul
from math import sqrt
from functools import total_ordering
from random import sample
@total_ordering
class Scalar:
def __init__(self: Scalar, val: float):
self.val = float(val)
def __mul__(self: Scalar, other: Union[Scalar, Vector]) -> Union[Scalar, Vector]:
if isinstance(other, Scalar):
return Scalar(self.val * other.val)
elif isinstance(other, Vector):
return Vector(*[i * self.val for i in iter(other)])
else:
raise TypeError("{wrongType} should be either Scalar or Vector".format(wrongType=type(other)))
def __add__(self: Scalar, other: Scalar) -> Scalar:
return Scalar(self.val + other.val)
def __sub__(self: Scalar, other: Scalar) -> Scalar:
return Scalar(other.val - self.val)
def __truediv__(self: Scalar, other: Scalar) -> Scalar:
return Scalar(self.val / other.val)
def __rtruediv__(self: Scalar, other: Vector) -> Vector:
return Vector(*[i / self.val for i in iter(other)])
def __repr__(self: Scalar) -> str:
return "Scalar(%r)" % self.val
def sign(self: Scalar) -> int:
return -1 if self.val < 0 else 1 if self.val > 0 else 0
def __float__(self: Scalar) -> float:
return self.val
def __lt__(self: Scalar, other: Scalar):
return self.val < other.val
def __eq__(self: Scalar, other: Scalar):
return self.val == other.val
@total_ordering
class Vector:
def __init__(self: Vector, *entries: List[float]):
self.entries = entries
@staticmethod
def zero(size: int) -> Vector:
return Vector(*[0 for i in range(size)])
def __add__(self: Vector, other: Vector) -> Vector:
return Vector(*list(map(add, self.entries, other.entries)))
def __sub__(self: Vector, other: Vector) -> Vector:
return self + Scalar(-1) * other
def __mul__(self: Vector, other: Vector) -> Scalar:
return Scalar(sum(list(map(mul, self.entries, other.entries))))
def magnitude(self: Vector) -> Scalar:
return Scalar(sqrt(sum([i**2 for i in iter(self)])))
def unit(self: Vector) -> Vector:
return self / self.magnitude()
def __len__(self: Vector) -> int:
return len(self.entries)
def __repr__(self: Vector) -> str:
return "Vector%s" % repr(self.entries)
def __iter__(self: Vector):
return iter(self.entries)
def __lt__(self: Vector, other: Vector):
return self.magnitude() < other.magnitude()
def __eq__(self: Vector, other: Vector):
return self.magnitude() == other.magnitude()
class Perceptron:
def __init__(self: Perceptron, d: int):
self.D = d
# Weights
self.w = Vector.zero(d)
# Bias
self.b = Scalar(0)
# Cached weights and bias
self.u = Vector.zero(d)
self.B = Scalar(0)
def clear(self: Perceptron):
self.w = Vector.zero(self.D)
self.b = Scalar(0)
self.u = Vector.zero(self.D)
self.B = Scalar(0)
def train(self: Perceptron, x: List[Vector], y: List[Scalar], iters: int):
for it in range(iters):
for i in range(len(x)):
a = (self.w * x[i]) + self.b
if (a * y[i]).sign() <= 0:
self.w += y[i] * x[i]
self.b += y[i]
def averaged(self: Perceptron, x: List[Vector], y: List[Scalar], iters: int):
c = 1
for it in range(iters):
for i in range(len(x)):
a = (self.w * x[i]) + self.b
if (a * y[i]).sign() <= 0:
self.w += y[i] * x[i]
self.b += y[i]
self.u += y[i] * Scalar(c) * x[i]
self.B += y[i] * Scalar(c)
c += 1
self.w -= Scalar(1 / c) * self.u
self.b -= Scalar(1 / c) * self.B
def test(self: Perceptron, x: List[Vector], y: List[Scalar]):
sum = 0
for i in range(len(x)):
sum += 1 if ((self.w * x[i] + self.b) * y[i]).sign() > 0 else 0
return sum / len(x)
# Draw decision boundary, given vector w and bias
# the decision boundary is the hyperplane perpendicular to weights vector (w),
# shifted by bias (b) along w in opposite direction than w [ Daumé 4.3 ]
def draw_decision_boundary(w, b, col):
matplotlib.pyplot.axline(xy1=(0 - math.cos(math.atan2(w[1], w[0])) * b,
0 - math.sin(math.atan2(w[1], w[0])) * b),
xy2=(-(w[1]) - math.cos(math.atan2(w[1], w[0])) * b,
(w[0]) - math.sin(math.atan2(w[1], w[0])) * b), color=col)
# Task #1
def f(x):
return x**2
def df(x):
return 2*x
withJac = scipy.optimize.minimize(f, numpy.random.randint(-1000, 1000), jac=df)
woJac = scipy.optimize.minimize(f, numpy.random.randint(-1000, 1000), jac=False)
# scipy.optimize.minimize() always performs better with function providing gradient vector
print("scipy.optimize.minimize() performs better %s gradient vector function" % ("WITH" if withJac.get('nit') < woJac.get('nit') else "WITHOUT" ))
# Task #2
def hinge_loss_surrogate(y_gold, y_pred):
return numpy.max([.0, 1.0 - y_gold * y_pred])
def pNorm(w, p):
wSum = .0
for i in range(len(w)):
wSum += math.fabs(math.pow(w[i], p))
return pow(wSum, 1.0 / p)
# Added xs and ys as an input arguments, to avoid using global variables
def svm_loss(wb, C, D, txs, tys):
loss_sum = .0
for i in range(len(tys)):
loss_sum += hinge_loss_surrogate(tys[i], numpy.dot(wb[:D], txs[i]) + wb[D])
return pNorm(wb[:D], 2) / 2 + C * loss_sum
# Task #3
def svm(D, txs, tys):
svmwb = (scipy.optimize.minimize(svm_loss, numpy.array([.0, .0, .0]), args=(.25, D, txs, tys), jac=False))['x']
matplotlib.pyplot.scatter(svmwb[0], svmwb[1], marker='o', color='green')
draw_decision_boundary(svmwb[:2], svmwb[2], 'green')
# Task #4
def gradient_hinge_loss_surrogate(y_gold, y_pred):
if hinge_loss_surrogate(y_gold, y_pred) == .0:
return .0
else:
return -y_pred * y_gold
def gradient_svm_loss(wb, C, D, txs, tys):
loss_sum = wb[:D]
tys_sum = .0
for i in range(len(tys)):
loss_sum += gradient_hinge_loss_surrogate(tys[i], numpy.dot(wb[:D], txs[i]) + wb[D])
tys_sum += gradient_hinge_loss_surrogate(tys[i], numpy.dot(wb[:D], txs[i]) + wb[D])
return numpy.concatenate((wb[:D] + C * loss_sum, numpy.array([-tys_sum])))
def svm_grad(D, txs, tys, use_gradient):
conv = [.01, -.01, .01]
if use_gradient:
for i in range(100):
conv = (scipy.optimize.minimize(svm_loss, numpy.array(conv), args=(.05, D, txs, tys), jac=gradient_svm_loss))['jac']
svmwb = (scipy.optimize.minimize(svm_loss, numpy.array(conv), args=(.05, D, txs, tys), jac=gradient_svm_loss))['x']
else:
svmwb = (scipy.optimize.minimize(svm_loss, numpy.array(conv), args=(.05, D, txs, tys), jac=False))['x']
matplotlib.pyplot.scatter(svmwb[0], svmwb[1], marker='o', color='blue')
draw_decision_boundary(svmwb[:2], svmwb[2], 'blue')
# Task #5
samples_count = 20
# Create two isolated clusters of points in 2 dimensions
x_plus = numpy.random.normal(loc=[1, 1], scale=0.5, size=(samples_count, 2))
x_minus = numpy.random.normal(loc=[-1, -1], scale=0.5, size=(samples_count, 2))
# Graph the hyperplane found by training an averaged perceptron
per = Perceptron(2)
# Function to convert numpy ndarrays to list of vectors
def ndarrToListOf2dVectors(a: numpy.ndarray) -> List[Vector]:
return [Vector(*i) for i in a.tolist()]
# Convert numpy ndarrays to list of vectors, so that our pure-python perceptron could handle em
# We place em into single list containing positive ones, followed by negative ones
xs = ndarrToListOf2dVectors(x_plus) + ndarrToListOf2dVectors(x_minus)
# Mark list of answers for perceptron. If we change sign of answers in list
# then we will find trained perceptron weights vector being pointed opposite side
ys = [Scalar(.25)] * samples_count + [Scalar(-.25)] * samples_count
# Train averaged perceptron in 10 buckets of 25 trainings, shuffling training data before each bucket
for i in range(10):
ys, xs = [list(t) for t in zip(*sample(list(zip(ys, xs)), samples_count * 2))]
per.averaged(xs, ys, 25)
# Graph data for averaged perceptron
# Mark perceptron weights vector direction with red dot
matplotlib.pyplot.scatter(per.w.entries[0], per.w.entries[1], marker='o', color='red')
draw_decision_boundary(per.w.entries, per.b.val, 'red')
# Graph data for support vector machine without gradient
svm(2, numpy.concatenate((x_plus, x_minus)), numpy.array([.25 for i in range(samples_count)] + [-.25 for i in range(samples_count)]))
# Graph data for support vector machine with gradient
svm_grad(2, numpy.concatenate((x_plus, x_minus)), numpy.array([.25 for i in range(samples_count)] + [-.25 for i in range(samples_count)]), True)
# Plot input clusters of points
matplotlib.pyplot.scatter(x_plus[:, 0], x_plus[:, 1], marker='+', color='blue')
matplotlib.pyplot.scatter(x_minus[:, 0], x_minus[:, 1], marker='x', color='red')
matplotlib.pyplot.savefig("svm-svm-perceptron.pdf")
matplotlib.pyplot.show()