/
ExtendedBackprop.py
253 lines (235 loc) · 10.6 KB
/
ExtendedBackprop.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
from __future__ import print_function
__author__ = 'lbronner'
from scipy import dot, argmax
from random import shuffle
from math import isnan
from pybrain.supervised.trainers.trainer import Trainer
from pybrain.utilities import fListToString
from pybrain.auxiliary import GradientDescent
class ExtendedBackpropTrainer(Trainer):
"""Trainer that trains the parameters of a module according to a
supervised dataset (potentially sequential) by backpropagating the errors
(through time)."""
def __init__(self, module, dataset=None, learningrate=0.01, lrdecay=1.0,
momentum=0., verbose=False, batchlearning=False,
weightdecay=0.):
"""Create a BackpropTrainer to train the specified `module` on the
specified `dataset`.
The learning rate gives the ratio of which parameters are changed into
the direction of the gradient. The learning rate decreases by `lrdecay`,
which is used to to multiply the learning rate after each training
step. The parameters are also adjusted with respect to `momentum`, which
is the ratio by which the gradient of the last timestep is used.
If `batchlearning` is set, the parameters are updated only at the end of
each epoch. Default is False.
`weightdecay` corresponds to the weightdecay rate, where 0 is no weight
decay at all.
"""
Trainer.__init__(self, module)
self.setData(dataset)
self.verbose = verbose
self.batchlearning = batchlearning
self.weightdecay = weightdecay
self.epoch = 0
self.totalepochs = 0
# set up gradient descender
self.descent = GradientDescent()
self.descent.alpha = learningrate
self.descent.momentum = momentum
self.descent.alphadecay = lrdecay
self.descent.init(module.params)
def train(self):
"""Train the associated module for one epoch."""
assert len(self.ds) > 0, "Dataset cannot be empty."
self.module.resetDerivatives()
errors = 0
ponderation = 0.
shuffledSequences = []
for seq in self.ds._provideSequences():
shuffledSequences.append(seq)
shuffle(shuffledSequences)
for seq in shuffledSequences:
e, p = self._calcDerivs(seq)
errors += e
ponderation += p
if not self.batchlearning:
gradient = self.module.derivs - self.weightdecay * self.module.params
new = self.descent(gradient, errors)
if new is not None:
self.module.params[:] = new
self.module.resetDerivatives()
if self.verbose:
print("Total error: {z: .12g}".format(z=errors / ponderation))
if self.batchlearning:
self.module._setParameters(self.descent(self.module.derivs))
self.epoch += 1
self.totalepochs += 1
return errors / ponderation
def _calcDerivs(self, seq):
"""Calculate error function and backpropagate output errors to yield
the gradient."""
self.module.reset()
for sample in seq:
self.module.activate(sample[0])
error = 0
ponderation = 0.
for offset, sample in reversed(list(enumerate(seq))):
# need to make a distinction here between datasets containing
# importance, and others
target = sample[1]
outerr = target - self.module.outputbuffer[offset]
if len(sample) > 2:
importance = sample[2]
error += 0.5 * dot(importance, outerr ** 2)
ponderation += sum(importance)
self.module.backActivate(outerr * importance)
else:
error += 0.5 * sum(outerr ** 2)
ponderation += len(target)
# FIXME: the next line keeps arac from producing NaNs. I don't
# know why that is, but somehow the __str__ method of the
# ndarray class fixes something,
str(outerr)
self.module.backActivate(outerr)
return error, ponderation
def _checkGradient(self, dataset=None, silent=False):
"""Numeric check of the computed gradient for debugging purposes."""
if dataset:
self.setData(dataset)
res = []
for seq in self.ds._provideSequences():
self.module.resetDerivatives()
self._calcDerivs(seq)
e = 1e-6
analyticalDerivs = self.module.derivs.copy()
numericalDerivs = []
for p in range(self.module.paramdim):
storedoldval = self.module.params[p]
self.module.params[p] += e
righterror, dummy = self._calcDerivs(seq)
self.module.params[p] -= 2 * e
lefterror, dummy = self._calcDerivs(seq)
approxderiv = (righterror - lefterror) / (2 * e)
self.module.params[p] = storedoldval
numericalDerivs.append(approxderiv)
r = list(zip(analyticalDerivs, numericalDerivs))
res.append(r)
if not silent:
print(r)
return res
def testOnData(self, dataset=None, verbose=False):
"""Compute the MSE of the module performance on the given dataset.
If no dataset is supplied, the one passed upon Trainer initialization is
used."""
if dataset == None:
dataset = self.ds
dataset.reset()
if verbose:
print('\nTesting on data:')
errors = []
importances = []
ponderatedErrors = []
for seq in dataset._provideSequences():
self.module.reset()
e, i = dataset._evaluateSequence(self.module.activate, seq, verbose)
importances.append(i)
errors.append(e)
ponderatedErrors.append(e / i)
if verbose:
print(('All errors:', ponderatedErrors))
assert sum(importances) > 0
avgErr = sum(errors) / sum(importances)
if verbose:
print(('Average error:', avgErr))
print(('Max error:', max(ponderatedErrors), 'Median error:',
sorted(ponderatedErrors)[len(errors) / 2]))
return avgErr
def testOnClassData(self, dataset=None, verbose=False,
return_targets=False):
"""Return winner-takes-all classification output on a given dataset.
If no dataset is given, the dataset passed during Trainer
initialization is used. If return_targets is set, also return
corresponding target classes.
"""
if dataset == None:
dataset = self.ds
dataset.reset()
out = []
targ = []
for seq in dataset._provideSequences():
self.module.reset()
for input, target, importance in seq:
res = self.module.activate(input)
out.append(argmax(res))
targ.append(argmax(target))
if return_targets:
return out, targ
else:
return out
def trainUntilConvergence(self, dataset=None, maxEpochs=None, verbose=None,
continueEpochs=10, validationProportion=0.25,
trainingData=None, validationData=None,
convergence_threshold=10):
"""Train the module on the dataset until it converges.
Return the module with the parameters that gave the minimal validation
error.
If no dataset is given, the dataset passed during Trainer
initialization is used. validationProportion is the ratio of the dataset
that is used for the validation dataset.
If the training and validation data is already set, the splitPropotion is ignored
If maxEpochs is given, at most that many epochs
are trained. Each time validation error hits a minimum, try for
continueEpochs epochs to find a better one."""
epochs = 0
if dataset is None:
dataset = self.ds
if verbose is None:
verbose = self.verbose
if trainingData is None or validationData is None:
# Split the dataset randomly: validationProportion of the samples for
# validation.
trainingData, validationData = (
dataset.splitWithProportion(1 - validationProportion))
if not (len(trainingData) > 0 and len(validationData)):
raise ValueError("Provided dataset too small to be split into training " +
"and validation sets with proportion " + str(validationProportion))
self.ds = trainingData
bestweights = self.module.params.copy()
bestverr = self.testOnData(validationData)
bestepoch = 0
self.trainingErrors = []
self.validationErrors = [bestverr]
while True:
trainingError = self.train()
validationError = self.testOnData(validationData)
if isnan(trainingError) or isnan(validationError):
raise Exception("Training produced NaN results")
self.trainingErrors.append(trainingError)
self.validationErrors.append(validationError)
if epochs == 0 or self.validationErrors[-1] < bestverr:
# one update is always done
bestverr = self.validationErrors[-1]
bestweights = self.module.params.copy()
bestepoch = epochs
if maxEpochs != None and epochs >= maxEpochs:
self.module.params[:] = bestweights
break
epochs += 1
if len(self.validationErrors) >= continueEpochs * 2:
# have the validation errors started going up again?
# compare the average of the last few to the previous few
old = self.validationErrors[-continueEpochs * 2:-continueEpochs]
new = self.validationErrors[-continueEpochs:]
if min(new) > max(old):
self.module.params[:] = bestweights
break
lastnew = round(new[-1], convergence_threshold)
if sum(round(y, convergence_threshold) - lastnew for y in new) == 0:
self.module.params[:] = bestweights
break
#self.trainingErrors.append(self.testOnData(trainingData))
self.ds = dataset
if verbose:
print(('train-errors:', fListToString(self.trainingErrors, 6)))
print(('valid-errors:', fListToString(self.validationErrors, 6)))
return self.trainingErrors[:bestepoch], self.validationErrors[:1 + bestepoch]