/
utils.py
423 lines (365 loc) · 17.4 KB
/
utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
from os.path import expanduser
import time
import numpy as np
import pandas as pd
import theano
import theano.tensor as T
from sklearn.utils import shuffle
from sklearn.cross_validation import train_test_split
from lasagne.objectives import squared_error, aggregate
from lasagne.regularization import regularize_network_params, l2
from lasagne.updates import rmsprop, adagrad, nesterov_momentum
from lasagne.layers import get_output,get_all_params,set_all_param_values, get_all_param_values
from lasagne.init import GlorotUniform
import cPickle as pickle
from skimage.filters import sobel, sobel_h, sobel_v
from skimage.exposure import equalize_hist
class data_set(object):
"""
"""
def __init__(self,
path_train,
path_test):
"""
Get the path to the training and testing data and store them
"""
self.PATH_train = path_train
self.PATH_test = path_test
self._load_data()
def _load_data(self):
"""
Load the training data according to self.PATH_train
Extract X,y and store in self.X_nan,self.y_nan,including nan values
Calculate the alexnet-like mean image and store it in self.meanImageAlex
Calculate the VGG-like mean value, per channel mean, store it in
self.meanImageVGG
"""
self.df = pd.read_csv(expanduser(self.PATH_train))
self.df['Image'] = self.df['Image'].apply(lambda im: np.fromstring(im, sep=' '))
# tell the user that there are missing values
print '{} samples from the total of {} have missing values \n'.format(
self.df.isnull().any(axis=1).sum(), self.df.shape[0])
print 'Missing values appear in {} different columns of output targets \n'.format(
self.df.isnull().any(axis=0).sum())
# extract X,y
self._extract_Xy()
print 'shape of X', self.X.shape, 'and y', self.y.shape
def _extract_Xy(self, col = None):
if not col:
col = 'Image'
# extract X and y
self.X = np.vstack(self.df[col].values).astype(np.float32)
self.y = self.df[self.df.columns[:-1]].values.astype(np.float32)
def split_trainval(self):
# Shuffle the data
self.X, self.y = shuffle(self.X, self.y)
#######################################
# # scale inputs
# # TODO scaling to test the guide
# self.X = self.X / 255.
# self.y = (self.y - 48) / 48
# TODO zero mean images, 0meanind
# temp = self.X.T - self.X.mean(axis = 1)
# self.X = temp.T
######################################
# reshape
self.X = self.X.reshape(self.X.shape[0], -1, 96, 96).astype(theano.config.floatX)
self.y = self.y.astype(theano.config.floatX)
# train validation split
self.X, self.X_val, self.y, self.y_val = train_test_split(
self.X, self.y, test_size=0.2, random_state=55)
# Calculate mean image. Note that X doesnt have missing values
self.meanImageAlex = self.X.mean(axis=0)
# calculate mean value pre channel, here ve only have one channel
self.meanImageVGG = self.X.mean()
def augment(self):
# augment training data only applies to training set
print 'augmenting the training data \n'
tempX = np.copy(self.X)
tempX = tempX[:, :, :, ::-1]
tempy = np.copy(self.y)
tempy[:,::2] = 96 - tempy[:,::2]
self.X = np.concatenate((self.X, tempX), axis=0)
self.y = np.concatenate((self.y, tempy), axis=0)
# Shuffle the data
self.X, self.y = shuffle(self.X, self.y, random_state=47)
# Calculate mean image. Note that X doesnt have missing values
self.meanImageAlex = self.X.mean(axis=0)
# calculate mean value pre channel, here ve only have one channel
self.meanImageVGG = self.X.mean()
def drop_missing_values(self):
""""
Drop the samples that contain missing values.
The effect of running this function is irreversible.
"""
# Drop samples with missing values
self.df = self.df.dropna()
# extract X and y
self._extract_Xy()
print 'shape of X',self.X.shape, 'and y',self.y.shape
def center_alexnet(self, X=None):
"""
Center according to the mean image calculated using the training data
Alexnet style
:param X: numpy array same number of features as the training data
:return: Centered dataset
"""
if X:
return X - self.meanImageAlex
else:
self.X = self.X - self.meanImageAlex
self.X_val = self.X_val - self.meanImageAlex
print 'Training data has been centered alexnet style'
def center_VGG(self, X=None):
"""
Center according to the mean image calculated using the training data
VGG style
:param X: numpy array same number of features as the training data
:return: Centered dataset
"""
if X:
return X - self.meanImageVGG
else:
self.X = self.X - self.meanImageVGG
self.X_val = self.X_val - self.meanImageVGG
print 'Training data has been centered VGG style'
def sobel_image(self):
# replace images with the output of sobel filter on each image
self.df['Image'] = self.df['Image'].apply(lambda im: sobel(im.reshape(96, 96)).reshape(-1))
self._extract_Xy(col='Image')
def hist_eqal_image(self):
""""Extract histogram equalized images"""
self.df['Image'] = self.df['Image'].apply(lambda im: equalize_hist(im.reshape(96, 96)).reshape(-1))
self._extract_Xy(col='Image')
def stack_origi_sobel(self):
"""stack original image with """
df_preproc = pd.DataFrame(self.df['Image'])
df_preproc['sobelh'] = df_preproc['Image'].apply(lambda im: sobel_h(im.reshape(96, 96)).reshape(-1))
df_preproc['sobelv'] = df_preproc['Image'].apply(lambda im: sobel_v(im.reshape(96, 96)).reshape(-1))
col = 'Image'
self.X = np.vstack(df_preproc[col].values).reshape(-1,1,96,96)
self.y = self.df[self.df.columns[:-1]].values
col = 'sobelh'
tempx1 = np.vstack(df_preproc[col].values).reshape(-1, 1, 96, 96)
col = 'sobelv'
tempx2 = np.vstack(df_preproc[col].values).reshape(-1, 1, 96, 96)
self.X = np.concatenate((self.X,tempx1,tempx2), axis=1)
def reinitiate_set_params(network,
weights = None):
# change weights of a trained network to a random set or a user defined value
# useful in case of big networks and cross validation
# instead of the long time of recompiling you can just
# re-init the network weights
if not weights:
old = get_all_param_values(network)
weights = []
for layer in old:
shape = layer.shape
if len(shape)<2:
shape = (shape[0], 1)
W= GlorotUniform()(shape)
if W.shape != layer.shape:
W = np.squeeze(W, axis= 1)
weights.append(W)
set_all_param_values(network, weights)
return network
def shared_dataset(X,y,borrow=True):
"""
:param X: array like to be shared in theano
:param y: array like to be shared in theano
:param borrow: borrow option of theano.shared
:return: shared version of X,y
"""
shared_x=theano.shared(np.asarray(X,
dtype=theano.config.floatX),
borrow=borrow)
shared_y= theano.shared(np.asarray(y,
dtype=theano.config.floatX),
borrow=borrow)
return shared_x, shared_y
def build_update_functions(train_set_x, train_set_y,
valid_set_x, valid_set_y,
network,
y, X,
train_MASK, val_MASK,
batch_size=32,
l2_reg=.0001,
learning_rate=.005,
momentum=.9):
# build update functions
# extract tensor representing the network predictions
prediction = get_output(network)
################################################
##################old###########################
# # collect squared error
# loss_RMSE = squared_error(prediction, y)
# # compute the root mean squared error
# loss_RMSE = loss_RMSE.mean().sqrt()
###################New#########################
# Aggregate the element-wise error into a scalar value using a mask
# note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
# is not used to calculate the aggregated error and update of the network.
# MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.
# build tensor variable for mask
trainMASK = T.matrix('trainMASK')
# collect squared error
loss_RMSE = squared_error(prediction, y)
# Drop nan values and average over the remaining values
loss_RMSE = aggregate(loss_RMSE, weights=trainMASK, mode='normalized_sum')
# compute the square root
loss_RMSE = loss_RMSE.sqrt()
###############################################
# add l2 regularization
l2_penalty = regularize_network_params(network, l2)
loss = (1 - l2_reg) * loss_RMSE + l2_reg * l2_penalty
# get network params
params = get_all_params(network, trainable = True)
# # create update criterion
# print('nestrov')
# updates = nesterov_momentum( loss, params, learning_rate=.01, momentum=.9)
# print('AdaGrad')
# updates = adagrad(loss, params,learning_rate= 1e-2)
#
print('RMSPROP \n')
updates = rmsprop(loss, params, learning_rate=learning_rate)
# create validation/test loss expression
# the loss represents the loss for all the labels
test_prediction = get_output(network, deterministic=True)
################################################
##################old###########################
# # collect squared error
# test_loss = squared_error(test_prediction,y)
# # compute the root mean squared error
# test_loss = test_loss.mean().sqrt()
# # test_loss_withl2 = (1-l2_reg) * test_loss + l2_reg * l2_penalty
################################################
###################New#########################
# Aggregate the element-wise error into a scalar value using a mask
# note that y should note contain NAN, replace them with 0 or -1. The value does not matter. It
# is not used to calculate the aggregated error and update of the network.
# MASK should be a matrix of size(y), with 0s in place of NaN values and 1s everywhere else.
# build tensor variable for mask
valMASK = T.matrix('valMASK')
# collect squared error
test_loss = squared_error(test_prediction, y)
# Drop nan values and average over the remaining values
test_loss = aggregate(test_loss, weights=valMASK, mode='normalized_sum')
# compute the square root
test_loss = test_loss.sqrt()
################################################
# index for mini-batch slicing
index = T.lscalar()
# training function
train_set_x_size = train_set_x.get_value().shape[0]
val_set_x_size = valid_set_x.get_value().shape[0]
train_fn = theano.function(inputs=[index],
outputs=[loss, loss_RMSE],
updates=updates,
givens={X: train_set_x[
index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
y: train_set_y[
index * batch_size: T.minimum((index + 1) * batch_size, train_set_x_size)],
trainMASK: train_MASK[index * batch_size: T.minimum((index + 1) * batch_size,
train_set_x_size)]})
# validation function
val_fn = theano.function(inputs=[index],
outputs=[test_loss, prediction],
givens={X: valid_set_x[
index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
y: valid_set_y[
index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)],
valMASK: val_MASK[
index * batch_size: T.minimum((index + 1) * batch_size, val_set_x_size)]})
return train_fn, val_fn
def early_stop_train(train_set_x,train_set_y,
valid_set_x,valid_set_y,
network,train_fn,val_fn,batch_size = 32):
"""Get the network and update functions as input and apply early stop training.
Should return a trained network with training history.
----------------------
Input
----------------------
:train_set_x: Training samples, loaded to GPU by theano.shared()
:valid_set_x: Test samples, loaded to GPU by theano.shared()
:train_set_y: Training outputs, loaded to GPU by theano.shared()
:valid_set_y: Training outputs, loaded to GPU by theano.shared()
:network: Deep model, the output layer of the network build using lasagne
:train_fn: theano.function to update the network
:val_fn: theano.function to calculate validation loss
----------------------
Outputs
----------------------
train_loss_history
val_loss_history_
network
----------------------
"""
# network parameters
# TODO: for testing hyper parameters, n_iter set to 400
n_iter = 2000
improvement_threshold = 0.998
patience = 40000
n_train_batches = train_set_x.get_value(borrow=True).shape[0] // batch_size + 1
n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] // batch_size + 1
patience_increase = 1.3
validation_frequency = min(n_train_batches, patience // 10)
print 'validation_frequency',validation_frequency
train_loss_history_temp = []
best_val_loss_ = np.inf
epoch = 0
done_looping = False
train_loss_history_ = []
val_loss_history_ = []
print 'start training'
print 'shape training', train_set_x.get_value(borrow=True).shape, '\n'
print 'shape validation', valid_set_x.get_value(borrow=True).shape, '\n'
start_time = time.time()
while (epoch < n_iter) and (not done_looping):
epoch += 1
# go over mini-batches for a full epoch
for minibatch_index in range(n_train_batches):
# update network for one mini-batch
minibatch_average_cost, minibatch_average_RMSE = train_fn(minibatch_index)
# store training loss of mini-batches till the next validation step
train_loss_history_temp.append(minibatch_average_RMSE)
# number of mini-batches checked
num_minibatch_checked = (epoch - 1) * n_train_batches + minibatch_index
# if validation interval reached
if (num_minibatch_checked + 1) % validation_frequency == 0:
# compute validation loss
validation_losses = [val_fn(i)[0] for i in range(n_valid_batches)]
# store mean validation loss for validation set
current_val_loss = np.mean(validation_losses)
# store training and validation history
train_loss_history_.append(np.mean(train_loss_history_temp))
val_loss_history_.append(current_val_loss)
train_loss_history_temp = []
# is it the best validation loss so far?
if current_val_loss < best_val_loss_:
# increase patience if improvement is significant
if (current_val_loss < best_val_loss_ * improvement_threshold):
patience = max(patience, num_minibatch_checked * patience_increase)
# save the-so-far-best validation RMSE and epoch and model-params
best_val_loss_ = current_val_loss
best_epoch_ = epoch
best_network_params = get_all_param_values(network)
# save the best model as pickle file
pickle.dump([best_val_loss_,best_epoch_,
train_loss_history_,val_loss_history_,network],
open("results.p", "wb"))
# check if patience exceeded and set the training loop to stop
if (patience <= num_minibatch_checked):
print 'patience reached \n'
# reset the network weights to the best params saved
print 'resetting the network params to that of the best seen \n'
reinitiate_set_params(network=network,
weights=best_network_params)
# done optimising, break the optimisation loop
done_looping = True
break
freq = 1
if (epoch % freq) == 0:
print (('epoch %i, val_loss %f, train_loss %f, best_val_los %f, patience %i,%f secs\n') %
(epoch, current_val_loss,train_loss_history_[-1], best_val_loss_,patience, time.time() - start_time))
start_time = time.time()
return 0