-
Notifications
You must be signed in to change notification settings - Fork 0
/
train_ft.py
149 lines (122 loc) · 5.86 KB
/
train_ft.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from __future__ import print_function
import argparse
import chainer
import chainer.links as L
import chainer.functions as F
from chainer import training
from chainer.training import extensions
import image_dataset
import alexnet
import pickle
import numpy as np
class TestModeEvaluator(extensions.Evaluator):
def evaluate(self):
model = self.get_target('main')
model.predictor.train = False
ret = super(TestModeEvaluator, self).evaluate()
model.predictor.train = True
return ret
class DelGradient(object):
name = 'DelGradient'
def __init__(self, delTgt):
self.delTgt = delTgt
def __call__(self, opt):
for name, param in opt.target.namedparams():
for d in self.delTgt:
if d in name:
grad = param.grad
with chainer.cuda.get_device(grad):
grad *= 0
def copy_model(src, dst):
assert isinstance(src, chainer.Chain)
assert isinstance(dst, chainer.Chain)
for child in src.children():
if child.name not in dst.__dict__: continue
dst_child = dst[child.name]
if type(child) != type(dst_child): continue
if isinstance(child, chainer.Chain):
copy_model(child, dst_child)
if isinstance(child, chainer.Link):
match = True
for a, b in zip(child.namedparams(), dst_child.namedparams()):
if a[0] != b[0]:
match = False
break
if a[1].data.shape != b[1].data.shape:
match = False
break
if not match:
print('Ignore %s because of parameter mismatch' % child.name)
continue
for a, b in zip(child.namedparams(), dst_child.namedparams()):
b[1].data = a[1].data
print('Copy %s' % child.name)
def main():
parser = argparse.ArgumentParser(description='Chainer CIFAR example:')
parser.add_argument('--train', default='train.txt', type=str, help='File name of train data')
parser.add_argument('--test', default='validation.txt', type=str, help='File name of validation data')
parser.add_argument('--root', '-R', default='.', help='Root directory path of image files')
parser.add_argument('--batchsize', '-b', type=int, default=128,
help='Number of images in each mini-batch')
parser.add_argument('--epoch', '-e', type=int, default=20,
help='Number of sweeps over the dataset to train')
parser.add_argument('--gpu', '-g', type=int, default=0,
help='GPU ID (negative value indicates CPU)')
parser.add_argument('--out', '-o', default='result',
help='Directory to output the result')
parser.add_argument('--resume', '-r', default='',
help='Resume the training from snapshot')
parser.add_argument('--mean', default=None, help='mean file (computed by compute_mean.py)')
args = parser.parse_args()
print('GPU: {}'.format(args.gpu))
print('# Minibatch-size: {}'.format(args.batchsize))
print('# epoch: {}'.format(args.epoch))
print('')
# Set up a neural network to train.
# Classifier reports softmax cross entropy loss and accuracy at every
# iteration, which will be used by the PrintReport extension below.
train = image_dataset.ImageDataset(args.train, args.root, max_size=128, mean=args.mean)
test = image_dataset.ImageDataset(args.test, args.root, max_size=128, mean=args.mean)
model = L.Classifier(alexnet.FromCaffeAlexnet(1), lossfun=F.mean_squared_error)
original_model = pickle.load(open('alexnet.pkl', 'rb'))
copy_model(original_model, model.predictor)
model.compute_accuracy = False
if args.gpu >= 0:
chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current
model.to_gpu() # Copy the model to the GPU
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))
optimizer.add_hook(DelGradient(["conv1", "conv2", "conv3", "conv4", "conv5"]))
train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
repeat=False, shuffle=False)
# Set up a trainer
updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
# Evaluate the model with the test dataset for each epoch
trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu))
# Dump a computational graph from 'loss' variable at the first iteration
# The "main" refers to the target link of the "main" optimizer.
trainer.extend(extensions.dump_graph('main/loss'))
# Take a snapshot at each epoch
trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch'))
trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(args.epoch, 'epoch'))
# Write a log of evaluation statistics for each epoch
trainer.extend(extensions.LogReport())
# Print selected entries of the log to stdout
# Here "main" refers to the target link of the "main" optimizer again, and
# "validation" refers to the default name of the Evaluator extension.
# Entries other than 'epoch' are reported by the Classifier link, called by
# either the updater or the evaluator.
trainer.extend(extensions.PrintReport(
['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time']))
# Print a progress bar to stdout
trainer.extend(extensions.ProgressBar())
if args.resume:
# Resume from a snapshot
chainer.serializers.load_npz(args.resume, trainer)
# Run the training
trainer.run()
if __name__ == '__main__':
main()