/
mlp_theano.py
105 lines (80 loc) · 2.68 KB
/
mlp_theano.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
import theano
import theano.tensor as T
import numpy as np
class HiddenLayer(object):
def __init__(self, input, n_in, n_out, W=None, b=None,
activation=T.tanh):
self.input = input
if W is None:
W_values = np.asarray(
np.random.uniform(
low=-np.sqrt(6. / (n_in + n_out)),
high=np.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)
),
dtype=theano.config.floatX
)
if activation == theano.tensor.nnet.sigmoid:
W_values *= 4
W = theano.shared(value=W_values, name='W', borrow=True)
if b is None:
b_values = np.zeros((n_out,), dtype=theano.config.floatX)
b = theano.shared(value=b_values, name='b', borrow=True)
self.W = W
self.b = b
lin_output = T.dot(input, self.W) + self.b
self.output = (
lin_output if activation is None
else activation(lin_output)
)
# parameters of the model
self.params = {'W':self.W, 'b':self.b}
class MLP(object):
def __init__(self, sizes, input_dim, output_dim):
self.layers = len(sizes) + 1
in_dim = [input_dim] + sizes
out_dim = sizes + [output_dim]
x = T.dvector('x')
y = T.dvector('y')
self.hyp_params = []
for i, (r,c) in enumerate(zip(in_dim,out_dim)):
if i == 0:
obj = HiddenLayer(x, r, c)
else:
obj = HiddenLayer(obj.output,r,c)
self.hyp_params.append(obj.params)
yhat = obj.output
prediction = T.argmax(yhat)
self.predict = theano.function([x],[yhat])
o_error = T.sum(T.sqr(yhat - y))
# o_error = T.sum(T.nnet.categorical_crossentropy(yhat, y))
updates = []
learning_rate = T.scalar('learning_rate')
for param in self.hyp_params:
updates.append((param['W'], param['W'] - learning_rate * T.grad(o_error,param['W'])))
updates.append((param['b'], param['b'] - learning_rate * T.grad(o_error,param['b'])))
self.train_step = theano.function([x,y,learning_rate],[o_error],
updates = updates)
obj = MLP([2,2],2,1)
theano.printing.pydotprint(obj.predict, outfile="logreg_pydotprint_prediction_mlp_new.png", var_with_name_simple=True)
exit()
x = [[0.0 ,0.0], [0.0, 1.0], [1.0 ,0.0],[1.0 ,1.0]]
y = [[0.0], [1.0], [1.], [0.0]]
# x = np.array([0.0, 1.0])
# print x.shape
# exit()
# y = np.array([.99])
# k = obj.train_step(x,y,0.1)
for i in range(10000):
# print i
for i in range(len(x)):
ix = np.array(x[i])
# print ix.shape
# exit()
iy = np.array(y[i])
loss = obj.train_step(ix,iy,0.1)[0]
print loss
# print obj.predict(np.array([0.0, 0.0]))[0]
# print obj.predict(np.array([0.0, 1.0]))[0]
# print obj.predict(np.array([1.0, 0.0]))[0]
# print obj.predict(np.array([1.0, 1.0]))[0]