-
Notifications
You must be signed in to change notification settings - Fork 0
/
network.py
311 lines (262 loc) · 12.6 KB
/
network.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
import numpy as np
import cPickle
import os
import sys
import collections
from theano import *
import theano.tensor as T
from theano.tensor.signal.downsample import max_pool_2d
from theano.tensor.nnet.conv import conv2d
from theano.tensor.nnet import softmax
from theano.misc.pkl_utils import dump, load
from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams
from math import floor
from PIL import Image
#### Miscellaneous functions
def sigmoid(z):
"""The sigmoid function."""
return 1.0/(1.0+np.exp(-z))
def ReLU(z):
newZ = z.clip(0, float('inf'))
return newZ
def mse(y, a, n):
""" Theano mean-squared error math. Used as cost function in network. """
return T.sum(T.sqr(y-a))/n
def cross_entropy(y, a, n):
return T.sum(T.nnet.binary_crossentropy(a,y))/n
def log_likelihood(y, a, n):
return -T.sum(T.log(a[y.nonzero()]))/n
def l2_norm(c, weights, n, lam):
l2_sum = 0
for weight in weights:
if weight is not None:
l2_sum = l2_sum + 0.5*T.sum(T.sqr(weight))*lam
return c + l2_sum
class Network():
"""
A convolutional neural network class that implements the scholastic
gradient descent learning algorithm
"""
def __init__(self, layers, mini_batch_size, cost_func=mse, freeze_layers=False):
self.layers = layers
self.mini_batch_size = mini_batch_size
self.cost_func = cost_func
self.out_funcs = []
if not isinstance(freeze_layers,collections.Sequence):
freeze_layers = [False for layer in layers]
# Theano feedforward function that connects the output of each layer
# to the input of the next
v = T.vector()
out = v.dimshuffle('x', 'x', 'x', 0)
for layer in layers:
out = layer.set_inpt(out, training=False)
self.out_funcs.append(theano.function(inputs = [v], outputs = out))
self.feedforward = self.out_funcs[-1]
inpt = T.matrix()
out = inpt
for layer in layers:
out = layer.set_inpt(out, training=True)
# List of each individual parameter for all the layers
self.params = []
self.full_params = []
self.conv_layers = []
for k in xrange(len(layers)):
if layers[k].is_convolutional:
self.conv_layers += [k]
for freeze_layer, layer in zip(freeze_layers,layers):
self.full_params += layer.params
if freeze_layer:
# Freeze the convolutional layer parameters by not updating them.
continue
self.params += layer.params
# Theano cost function (x=target output, y=output of network)
target = T.matrix()
cost = self.cost_func(target, out, mini_batch_size)
# Apply L2 Normalization
l2_lambda = T.fscalar()
weights = [layer.w for layer in layers]
self.fmaps = []
for weight,layer in zip(weights,layers):
if isinstance(layer,ConvLayer):
self.fmaps.append(weight)
cost = l2_norm(cost, weights, mini_batch_size, l2_lambda)
grads = [T.grad(cost, param) for param in self.params]
# weights = weights - nabla_w * eta
eta = T.fscalar()
i = T.iscalar()
self.test_data_x = shared(np.zeros((mini_batch_size,784),config.floatX))
self.test_data_y = shared(np.zeros((mini_batch_size,10),config.floatX))
self.update_params = theano.function(inputs = [i,eta,l2_lambda],
outputs = [cost],
givens = [
(inpt,self.test_data_x[i*mini_batch_size:(i+1)*mini_batch_size]),
(target,self.test_data_y[i*mini_batch_size:(i+1)*mini_batch_size])],
#updates = self.weights - nabla_w*eta
updates =
[(param, param - eta*grad)
for (param, grad) in zip(self.params,grads)])
def evaluate(self, test_data):
total_correct = 0
for x,y in zip(test_data[0],test_data[1]):
guess = np.argmax(self.feedforward(x))
actual_num = np.argmax(y)
total_correct += (guess == actual_num)
return int(total_correct)
def SGD(self, training_data, epochs, mini_batch_size, eta, l2_lambda = 0.00,
test_data=None):
rng = numpy.random.RandomState()
n = len(training_data[0])
for iteration in xrange(epochs):
if test_data is not None:
num_correct = self.evaluate(test_data)
print "Evaluation: " + str(num_correct) + " / " + str(len(test_data[0]))
order = range(len(training_data[0]))
rng.shuffle(order)
mini_batch_x = [training_data[0][i] for i in order]
mini_batch_y = [training_data[1][i] for i in order]
self.test_data_x.set_value(mini_batch_x)
self.test_data_y.set_value(mini_batch_y)
# Update the parameters for each image in the mini_batch
for i in xrange(int(floor(len(mini_batch_x)/mini_batch_size))):
cost = self.update_params(i,eta,l2_lambda)
if i==0:
print cost
if not np.isfinite(cost):
print 'Cost function became non-finite. Aborting...'
sys.exit()
if test_data is not None:
num_correct = self.evaluate(test_data)
print "Evaluation: " + str(num_correct) + " / " + str(len(test_data[0]))
def save_params(self, filename):
cwd = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(cwd,filename+'.zip'),'wb') as f:
params = [param.get_value() for param in self.full_params]
dump(params, f)
def load_params(self, filename):
cwd = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(cwd,filename+'.zip'),'rb') as f:
params = load(f)
#print params,self.params[0].get_value()
for i,param in enumerate(params):
self.full_params[i].set_value(param)
def save_feature_maps(self,filename):
cwd = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(cwd,filename+'.zip'),'wb') as f:
for k in self.conv_layers:
params += [param.get_value() for param in self.params[k]]
dump(params, f)
def load_feature_maps(self, filename, src_layer, src_fmap_nums, dst_layer, dst_fmap_nums):
cwd = os.path.dirname(os.path.realpath(__file__))
with open(os.path.join(cwd,filename+'.zip'),'rb') as f:
params = load(f)
curr_fmaps = self.layers[dst_layer].w.get_value()
curr_biases = self.layers[dst_layer].b.get_value()
src_w_param_num = self.full_params.index(self.layers[src_layer].w)
src_b_param_num = self.full_params.index(self.layers[src_layer].b)
for src_fmap_num, dst_fmap_num in zip(src_fmap_nums,dst_fmap_nums):
curr_fmaps[dst_fmap_num] = params[src_w_param_num][src_fmap_num]
curr_biases[dst_fmap_num] = params[src_b_param_num][src_fmap_num]
self.layers[dst_layer].w.set_value(curr_fmaps)
self.layers[dst_layer].b.set_value(curr_biases)
rng = numpy.random.RandomState()
class ConvLayer(object):
"""Used to create a convolutional layer."""
def __init__(self, filter_shape, image_shape, border_mode='valid', unique_weights_per_input=False,
dropout_rate=0.0, activation_fn=sigmoid):
"""
`filter_shape` : a tuple of length 4, whose entries are
0: the number of filters,
1: the number of input feature maps,
2: the filter height, and
3: the filter width.
`image_shape` : a tuple of length 4, whose entries are
0: the mini-batch size,
1: the number of input feature maps,
2: the image height, and
3: the image width.
"""
self.filter_shape = filter_shape
self.image_shape = image_shape
self.activation_fn=activation_fn
self.dropout_rate = dropout_rate
self.unique_weights_per_input = unique_weights_per_input
self.border_mode = border_mode
self.nrg = RandomStreams()
self.is_convolutional = True
# initialize weights and biases
n_outputs = image_shape[0]*image_shape[2]*image_shape[3]
self.b = shared(rng.normal(0.5,0.5,self.filter_shape[0]).astype(config.floatX))
if self.unique_weights_per_input:
self.w = shared(rng.normal(0,1.0/(n_outputs*(1-self.dropout_rate)),filter_shape).astype(config.floatX))
self.params = [self.w, self.b]
else:
self.w = shared(rng.normal(0,1.0/(n_outputs*(1-self.dropout_rate)),(filter_shape[0], 1, filter_shape[2], filter_shape[3])).astype(config.floatX), broadcastable=(False, True, False, False))
self.inp_fmap_coeff_w = shared(rng.normal(0,1.0,(filter_shape[0],filter_shape[1],1,1)).astype(config.floatX), broadcastable=(False, False, True, True))
self.params = [self.w, self.inp_fmap_coeff_w, self.b]
def set_inpt(self, inpt, training):
# image, feature maps, imagex, imagey
if training:
new_shape = self.image_shape
else:
new_shape = (1,)+self.image_shape[1:]
self.inpt = inpt.reshape(new_shape)
# Extend w for all input maps
if self.unique_weights_per_input:
full_w = self.w
else:
full_w = self.w*self.inp_fmap_coeff_w
#biases = self.b
if training:
# Bernoulli process needed for dropout
bern = self.nrg.binomial(size = self.image_shape,p=1.0-self.dropout_rate).astype(config.floatX)
#bern = bern.reshape(T.shape(self.inpt))
self.inpt=self.inpt*bern
#biases = self.b*bern
else:
inpt = inpt*(1-self.dropout_rate)
conv_out = conv2d(self.inpt, full_w, image_shape=new_shape, filter_shape=self.filter_shape, border_mode=self.border_mode)
self.output = conv_out + self.b.dimshuffle(('x', 0, 'x', 'x'))
return self.activation_fn(self.output)
class PoolLayer(object):
"""Used to create a pooling layer."""
def __init__(self, image_shape, pool_size, pool_mode = 'max'):
self.image_shape = image_shape
self.pool_size = pool_size
self.pool_mode = pool_mode
self.is_convolutional = False
self.params = []
self.w = None
def set_inpt(self, inpt, training):
if training:
new_shape = self.image_shape
else:
new_shape = (1,)+self.image_shape[1:]
self.inpt = inpt.reshape(new_shape)
#images2neibs currently not fully implemented
#neibs = T.nnet.neighbours.images2neibs(self.inpt, self.pool_size)
#return self.pool_func(neibs)
return max_pool_2d(self.inpt,self.pool_size, ignore_border = False,
st=None, padding=(0,0), mode=self.pool_mode)
class FullyConnectedLayer(object):
"""Used to create a fully connected layer of neurons."""
def __init__(self, n_inputs, n_outputs, dropout_rate = 0.0, activation_fn=sigmoid):
self.n_inputs = n_inputs
self.n_outputs = n_outputs
self.dropout_rate = dropout_rate
self.activation_fn = activation_fn
self.is_convolutional = False
# Initializing weights and biases to samples from normal Gaussian
self.w = shared(rng.normal(0,1.0/(self.n_outputs*(1-self.dropout_rate)),(self.n_outputs,self.n_inputs)).astype(config.floatX), borrow=True)
self.b = shared(rng.normal(0,1.0,(self.n_outputs,)).astype(config.floatX), borrow=True)
self.params = [self.w, self.b]
self.nrg = RandomStreams()
def set_inpt(self, inpt, training):
self.inpt = T.flatten(inpt, 2)
if training:
bern = self.nrg.binomial(size = T.shape(self.inpt),p=1.0-self.dropout_rate, ndim=2).astype(config.floatX)
#bern = bern.reshape(T.shape(inpt))
inpt = inpt*bern
else:
inpt = inpt*(1-self.dropout_rate)
self.output = T.dot(self.w, self.inpt.T).T + self.b.dimshuffle('x',0)
return self.activation_fn(self.output)