N = 60000 x_train, x_test = np.split(mnist['data'], [N]) y_train, y_test = np.split(mnist['target'], [N]) N_test = y_test.size # Prepare multi-layer perceptron model, defined in net.py if args.net == 'simple': model = L.Classifier(net.MnistMLP(784, n_units, 10)) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy elif args.net == 'parallel': cuda.check_cuda_available() model = L.Classifier(net.MnistMLPParallel(784, n_units, 10)) xp = cuda.cupy # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) # Init/Resume if args.initmodel: print('Load model from', args.initmodel) serializers.load_npz(args.initmodel, model) if args.resume: print('Load optimizer state from', args.resume) serializers.load_npz(args.resume, optimizer) # Learning loop
#temp_1_test = np.where(y_test==1)[0] #y_test = y_test[temp_1_test] #x_test = x_test[temp_1_test] N = x_train.shape[0] N_test = y_test.size # Prepare multi-layer perceptron model, defined in net.py if args.net == 'simple': model = L.Classifier(net.MnistMLP(61, n_units, 3)) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy elif args.net == 'parallel': cuda.check_cuda_available() model = L.Classifier(net.MnistMLPParallel(61, n_units, 3)) xp = cuda.cupy # Setup optimizer if 'opt' in args: #Todo can also pass arguments to each optimizer, see https://github.com/mitmul/chainer-cifar10/blob/master/train.py#L62 if args.opt == 'MomentumSGD': optimizer = optimizers.MomentumSGD(lr=args.lr, momentum=0.9) elif args.opt == 'AdaGrad': optimizer = optimizers.AdaGrad(lr=args.lr) elif args.opt == 'Adam': optimizer = optimizers.Adam(alpha=args.alpha) else: optimizer = optimizers.Adam() optimizer.setup(model)
def train(trainL, trainA, testL, testA): trainLoss = [] trainAccuracy = [] testLoss = [] testAccuracy = [] l1w = [] l2w = [] l3w = [] print('load MNIST dataset') mnist = data.load_mnist_data() mnist['data'] = mnist['data'].astype(np.float32) mnist['data'] /= 255 mnist['target'] = mnist['target'].astype(np.int32) N = 1000 lsizes = [784, 50, 50, 10] x_train, x_test = np.split(mnist['data'], [N]) y_train, y_test = np.split(mnist['target'], [N]) N_test = y_test.size # Prepare multi-layer perceptron model, defined in net.py if args.net == 'simple': #model = net.MnistMLP(lsizes) model = net.MnistMLP(layer_sizes=lsizes) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy elif args.net == 'parallel': cuda.check_cuda_available() model = L.Classifier(net.MnistMLPParallel(784, n_units, 10)) xp = cuda.cupy # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) # Init/Resume if args.initmodel: print('Load model from', args.initmodel) serializers.load_npz(args.initmodel, model) if args.resume: print('Load optimizer state from', args.resume) serializers.load_npz(args.resume, optimizer) # Pretrain loop print("start pretrain") epo = p_epoch for j in six.moves.range(1, len(lsizes)): if j == len(lsizes) - 1: model.setfinetuning() print("start finetuning") epo = n_epoch for epoch in six.moves.range(1, epo + 1): print('layer ', j, 'p_epoch ', epoch) perm = np.random.permutation(N) sum_accuracy = 0 sum_loss = 0 for i in six.moves.range(0, N, batchsize): x = chainer.Variable(xp.asarray(x_train[perm[i:i + batchsize]])) t = chainer.Variable(xp.asarray(y_train[perm[i:i + batchsize]])) optimizer.update(model, x, t, j) sum_loss += float(model.loss.data) * len(t.data) if not model.pretrain: sum_accuracy += float(model.accuracy.data) * len(t.data) if model.pretrain: print('Pretrain: train mean loss={}'.format(sum_loss / N)) else: print('Finetune: train mean loss={}, accuracy={}'.format( sum_loss / N, sum_accuracy / N)) trainLoss.append(sum_loss / N) trainAccuracy.append(sum_accuracy / N) # evaluation sum_accuracy = 0 sum_loss = 0 model.train = False for i in six.moves.range(0, N_test, batchsize): x = chainer.Variable(xp.asarray(x_test[i:i + batchsize]), volatile='on') t = chainer.Variable(xp.asarray(y_test[i:i + batchsize]), volatile='on') loss = model(x, t, j) sum_loss += float(loss.data) * len(t.data) if not model.pretrain: sum_accuracy += float(model.accuracy.data) * len(t.data) if model.pretrain: print('Pretrain: test mean loss={}'.format(sum_loss / N_test)) else: print('Finetune: test mean loss={}, accuracy={}'.format( sum_loss / N_test, sum_accuracy / N_test)) testLoss.append(sum_loss / N_test) testAccuracy.append(sum_accuracy / N_test) model.train = True # Save the model and the optimizer savecsv(trainLoss, trainL) savecsv(trainAccuracy, trainA) savecsv(testLoss, testL) savecsv(testAccuracy, testA) print('save the model') serializers.save_npz('mlp.model', model) print('save the optimizer') serializers.save_npz('mlp.state', optimizer)
# Split dataset to data_train(80%) and data_test(20%) N = 320 data_train, data_test = np.split(dataset, [N]) target_train, target_test = np.split(targetset, [N]) N_test = target_test.size # Prepare multi-layer perceptron model, defined in net.py if args.net == 'simple': model = L.Classifier(net.MnistMLP(16128, n_units, 2)) if args.gpu >= 0: cuda.get_device(args.gpu).use() model.to_gpu() xp = np if args.gpu < 0 else cuda.cupy elif args.net == 'parallel': cuda.check_cuda_available() model = L.Classifier(net.MnistMLPParallel(16128, n_units, 2)) xp = cuda.cupy # Setup optimizer optimizer = optimizers.Adam() optimizer.setup(model) # Init/Resume if args.initmodel: print('Load model from', args.initmodel) serializers.load_hdf5(args.initmodel, model) if args.resume: print('Load optimizer state from', args.resume) serializers.load_hdf5(args.resume, optimizer) # Learning loop