def test_report(self): extension = extensions.ParameterStatistics(self.links, statistics=self.statistics) self.trainer.extend(extension) self.trainer.run() self.assertEqual(len(self.trainer.observation), self.expect)
def main(gpu_id=-1, bs=32, epoch=20, out='./result', resume=''): net = ShallowConv() model = L.Classifier(net) if gpu_id >= 0: chainer.cuda.get_device_from_id(gpu_id) model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.SerialIterator(train, bs) test_iter = chainer.iterators.SerialIterator( test, bs, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) trainer.extend(extensions.ParameterStatistics(model.predictor)) trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id)) trainer.extend(extensions.LogReport(log_name='parameter_statistics')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if resume: chainer.serializers.load_npz(resume, trainer) trainer.run()
def test_report_key_pattern(self): extension = extensions.ParameterStatistics(self.links) self.trainer.extend(extension) self.trainer.run() pattern = r'^(.+/){2,}(data|grad)/.+[^/]$' for name in six.iterkeys(self.trainer.observation): self.assertTrue(re.match(pattern, name))
def test_custom_function(self): extension = extensions.ParameterStatistics(self.links, statistics=self.statistics) self.trainer.extend(extension) self.trainer.run() for value in six.itervalues(self.trainer.observation): self.assertEqual(value, self.expect)
def test_report_late_register(self): extension = extensions.ParameterStatistics(self.links, statistics={}) for name, function in six.iteritems(self.statistics): extension.register_statistics(name, function) self.trainer.extend(extension) self.trainer.run() self.assertEqual(len(self.trainer.observation), self.expect)
def test_report_key_prefix(self): extension = extensions.ParameterStatistics(self.links, statistics=self.statistics, prefix='prefix') self.trainer.extend(extension) self.trainer.run() for name in six.iterkeys(self.trainer.observation): self.assertTrue(name.startswith('prefix'))
def create_extension(self, skip_statistics=False): kwargs = { 'statistics': self.statistics if not skip_statistics else None, 'report_params': self.report_params, 'report_grads': self.report_grads, 'prefix': self.prefix, 'skip_nan_params': True # avoid warnings when grads are nan } return extensions.ParameterStatistics(self.links, **kwargs)
def test_skip_params(self): extension = extensions.ParameterStatistics(self.links, statistics=self.statistics, report_params=False) self.trainer.extend(extension) self.trainer.run() for name in six.iterkeys(self.trainer.observation): self.assertIn('grad', name) self.assertNotIn('data', name)
def train_CNN(network_object, batchsize=128, gpu_id=-1, max_epoch=20, train_dataset=None, test_dataset=None, postfix='', base_lr=0.01, lr_decay=None,number = 11): number = str(number) # 1. Dataset if train_dataset is None and test_dataset is None: train, test = cifar.get_cifar10() else: train, test = train_dataset, test_dataset if gpu_id >= 0: network_object.to_gpu(gpu_id) # 2. Iterator train_iter = iterators.MultiprocessIterator(train, batchsize) test_iter = iterators.MultiprocessIterator(test, batchsize, False, False) # 3. Model net = L.Classifier(network_object) # 4. Optimizer optimizer = optimizers.MomentumSGD() optimizer.setup(net) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) # 5. Updater updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) # 6. Trainer trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='{}_crack_{}result'.format(network_object.__class__.__name__, postfix)) # 7. Trainer extensions trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name="log_"+number)) trainer.extend(extensions.snapshot(filename=number+'snapshot_epoch-{.updater.epoch}'),trigger=(5, 'epoch')) # trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.ParameterStatistics(net.predictor.conv1, {'std': np.std})) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.Evaluator(test_iter, net, device=gpu_id), name='val') trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time', 'lr'])) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss'+number+'.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy'+number+'.png')) trainer.extend(extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std'+number+'.png')) if lr_decay is not None: trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_decay) trainer.run() del trainer return net
def run(aspect, train, word2vec, epoch, frequency, gpu, out, batchsize, lr, sparsity_coef, coherent_coef, fix_embedding, dependent, order, resume): """ Train "Rationalizing Neural Predictions" for one specified aspect. Please refer README.md for details. """ memory = Memory(cachedir='.', verbose=1) w2v, vocab, dataset, _, _ = \ memory.cache(prepare_data)(train, word2vec, aspect) train_dataset, dev_dataset = chainer.datasets.split_dataset( dataset, len(dataset) - 500) encoder = rationale.models.Encoder(w2v.shape[1], order, 200, 2, dropout=0.1) generator_cls = (rationale.models.GeneratorDependent if dependent else rationale.models.Generator) # Original impl. uses two layers to model bi-directional LSTM generator = generator_cls(w2v.shape[1], order, 200, dropout=0.1) model = rationale.models.RationalizedRegressor(generator, encoder, w2v.shape[0], w2v.shape[1], initialEmb=w2v, dropout_emb=0.1, fix_embedding=fix_embedding, sparsity_coef=sparsity_coef, coherent_coef=coherent_coef) if gpu >= 0: logger.info('Using GPU (%d)' % gpu) # Make a specified GPU current chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU elif chainer.backends.intel64.is_ideep_available(): logger.info('Using CPU with iDeep') # iDeep was able to accelerate training on CPU by about 30% on laptop model.to_intel64() chainer.global_config.use_ideep = 'auto' else: logger.info('Using CPU without acceleration') # Impl. by author uses mean as loss. Let's divide lr by batchsize to have # similar effect optimizer = chainer.optimizers.Adam(alpha=lr / batchsize) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(3.0)) l2_reg = 1e-7 # Impl. by author implements Weight decay as L2 loss, thus multiplying it # by the learning rate. Let's implement it that way. optimizer.add_hook(chainer.optimizer.WeightDecay(l2_reg * lr)) train_iter = chainer.iterators.SerialIterator(train_dataset, batchsize) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=gpu, converter=rationale.training.convert) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) logger.info("train: {}, dev: {}".format(len(train_dataset), len(dev_dataset))) # Evaluate the model with the development dataset for each epoch dev_iter = chainer.iterators.SerialIterator(dev_dataset, batchsize, repeat=False, shuffle=False) evaluator = extensions.Evaluator(dev_iter, model, device=gpu, converter=rationale.training.convert) trainer.extend(evaluator, trigger=frequency) inv_vocab = {v: k for k, v in vocab.items()} @chainer.training.make_extension() def monitor_rationale(_): batch = dev_dataset[np.random.choice(len(dev_dataset))] batch = rationale.training.convert([batch], gpu) z = chainer.cuda.to_cpu(model.predict_rationale(batch['xs'])[0]) source = [ inv_vocab[int(xi)] for xi in chainer.cuda.to_cpu(batch['xs'][0]) ] result = [t if zi > 0.5 else '_' for t, zi in zip(source, z)] print('# source : ' + ' '.join(source)) print('# result : ' + ' '.join(result)) trainer.extend(monitor_rationale, trigger=(10, 'iteration')) trainer.extend(SaveRestore(filename='trainer.npz'), trigger=MinValueTrigger('validation/main/generator/cost'), priority=96) trainer.extend( ConditionalRestart(monitor='validation/main/generator/cost', mode='min', patients=2)) if gpu < 0: # ParameterStatistics does not work with GPU as of chainer 2.x # https://github.com/chainer/chainer/issues/3027 trainer.extend(extensions.ParameterStatistics(model, trigger=(100, 'iteration')), priority=99) # Write a log of evaluation statistics for each iteration trainer.extend(extensions.LogReport(trigger=(1, 'iteration')), priority=98) trainer.extend(extensions.PrintReport( [ 'epoch', 'main/encoder/mse', 'main/generator/cost', 'validation/main/encoder/mse', 'validation/main/generator/cost' ], log_report=extensions.LogReport(trigger=(10, 'iteration'))), trigger=(10, 'iteration'), priority=97) if resume: # Resume from a snapshot chainer.serializers.load_npz(resume, trainer) logger.info("Started training") trainer.run() # Save final model (without trainer) chainer.serializers.save_npz(os.path.join(out, 'trained_model.npz'), model) with open(os.path.join(out, 'vocab.json'), 'w') as fout: json.dump(vocab, fout)
EPOCH_NUM = 10 EMBED_SIZE = 100 HIDDEN_SIZE = 200 BATCH_SIZE = 5 OUT_SIZE = 2 model = L.Classifier( GRU_SentenceClassifier(vocab_size=len(words), embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, out_size=OUT_SIZE)) optimizer = optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.split_dataset_random(dataset, N - 10) train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE) test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False) updater = training.StandardUpdater(train_iter, optimizer, device=-1) trainer = training.Trainer(updater, (EPOCH_NUM, "epoch"), out="result") trainer.extend(extensions.Evaluator(test_iter, model, device=-1)) trainer.extend(extensions.LogReport(trigger=(1, "epoch"))) trainer.extend( extensions.PrintReport([ "epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time" ])) # エポック、学習損失、テスト損失、学習正解率、テスト正解率、経過時間 trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.ParameterStatistics(model)) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def train_model(model, train_iter, valid_iter, epoch=10, out='__result__', init_file=None, fix_trained=False, alpha=0.001, init_all=True): learner = model # 最適化手法の選択 optimizer = O.Adam(alpha=alpha).setup(learner) if fix_trained: for m in model[:-1]: m.disable_update() # Updaterの準備 (パラメータを更新) updater = T.StandardUpdater(train_iter, optimizer, device=C_.DEVICE) # Trainerの準備 trainer = T.Trainer(updater, stop_trigger=(epoch, 'epoch'), out=out) # TrainerにExtensionを追加する ## 検証 trainer.extend(E.Evaluator(valid_iter, learner, device=C_.DEVICE), name='val') ## モデルパラメータの統計を記録する trainer.extend(E.ParameterStatistics(learner.predictor, {'std': np.std}, prefix='links')) ## 学習率を記録する trainer.extend(E.observe_lr()) ## 学習経過を画面出力 trainer.extend( E.PrintReport( ['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr'])) ## ログ記録 (他のextensionsの結果も含まれる) trainer.extend(E.LogReport(log_name='log.json')) ## 学習経過を画像出力 if C_.OS_IS_WIN: def ex_pname(link): ls = list(link.links())[1:] if not ls: names = (p.name for p in link.params()) else: names = chain(*map(ex_pname, ls)) return [f'{link.name}/{n}' for n in names] def register(keys, file_name): trainer.extend(E.PlotReport(keys,# x_key='epoch', file_name=file_name, marker=None)) register('lr', file_name='lr.png') register(['main/loss', 'val/main/loss'], file_name='loss.png') if 'vae' in learner.name: register(['main/reconstr', 'val/main/reconstr'], file_name='reconstr.png') register(['main/kl_penalty', 'val/main/kl_penalty'], file_name='kl_penalty.png') register(['main/mse_vel', 'val/main/mse_vel'], file_name='mse_vel.png') register(['main/mse_vor', 'val/main/mse_vor'], file_name='mse_vor.png') for link in learner.predictor: param_names = ex_pname(link) for d in ('data', 'grad'): observe_keys_std = [f'links/predictor/{key}/{d}/std' for key in param_names] for l in ('enc', 'dec', 'bne', 'bnd'): file_name = f'std_{d}_{l}_{link.name}.png' f_ = lambda s: l in s# or f'bn{l[0]}' in s keys = list(filter(f_, observe_keys_std)) register(keys, file_name=file_name) ## ネットワーク形状をdot言語で出力 ## 可視化コード: ```dot -Tpng cg.dot -o [出力ファイル]``` trainer.extend(E.dump_graph('main/loss')) ## トレーナーオブジェクトをシリアライズし、出力ディレクトリに保存 trainer.extend( E.snapshot(filename='snapshot_epoch-{.updater.epoch}.model')) ## プログレスバー if C_.SHOW_PROGRESSBAR: trainer.extend(E.ProgressBar()) if init_file: print('loading snapshot:', init_file) try: if init_all: chainer.serializers.load_npz(init_file, trainer) else: chainer.serializers.load_npz(init_file, learner, path='updater/model:main/') except KeyError: raise # 自作Extension # trainer.extend(plot_loss_ex, trigger=(1, 'epoch')) # trainer.extend(lr_drop_ex(alpha), trigger=(1, 'epoch')) trainer.extend(pause_ex, trigger=(1, 'iteration')) # 学習を開始する try: trainer.run() except: print('trainer except') raise finally: print('trainer end')
def main(): set_random_seed(0) parser = argparse.ArgumentParser( description='Document Classification Example') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of documents in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of training epochs') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=200, help='Number of units') parser.add_argument('--vocab', '-v', type=int, default=50000, help='Vocabulary size') parser.add_argument('--layer', '-l', type=int, default=1, help='Number of layers of LSMT') parser.add_argument('--dropout', '-d', type=float, default=0.4, help='Dropout rate') parser.add_argument('--gradclip', type=float, default=5, help='Gradient clipping threshold') parser.add_argument('--train_file', '-train', default='data/train.seg.csv', help='Trainig data file.') parser.add_argument('--test_file', '-test', default='data/test.seg.csv', help='Test data file.') parser.add_argument('--model', '-m', help='read model parameters from npz file') parser.add_argument( '--vcb_file', '-vf', default= '/mnt/gold/users/s18153/prjPyCharm/prjNLP_GPU/data/vocab_train_w_NoReplace.vocab_file', help='Vocabulary data file.') parser.add_argument('--case', '-c', default='original', help='Select NN Architecture.') parser.add_argument('--opt', default='sgd', help='Select Optimizer.') parser.add_argument('--dbg_on', action='store_true', help='No save, MiniTrain') args = parser.parse_args() print(args) # train_val = data.DocDataset(args.train_file, vocab_size=args.vocab) if os.path.exists(args.vcb_file): # args.vocab_fileの存在確認(作成済みの場合ロード) with open(args.vcb_file, 'rb') as f_vocab_data: train_val = pickle.load(f_vocab_data) if len(train_val.get_vocab()) != args.vocab: warnings.warn('vocab size incorrect (not implemented...)') else: train_val = data.DocDataset( args.train_file, vocab_size=args.vocab) # make vocab from training data with open(args.vcb_file, 'wb') as f_vocab_save: pickle.dump(train_val, f_vocab_save) if args.dbg_on: len_train_data = len(train_val) N = 1000 print('N', N) rnd_ind = np.random.permutation(range(len_train_data))[:N] train_val = train_val[rnd_ind] (train, valid) = split_dataset_random(train_val, 800, seed=0) else: (train, valid) = split_dataset_random(train_val, 4000, seed=0) train_iter = iterators.SerialIterator(train, args.batchsize) valid_iter = iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) # test = data.DocDataset(args.test_file, train_val.get_vocab()) # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) print('case', args.case) if args.case == 'original': print('originalで実行されます') result_path = 'result/original' model = L.Classifier( nets_B.DocClassify(n_vocab=args.vocab + 1, n_units=args.unit, n_layers=args.layer, n_out=4, dropout=args.dropout)) elif args.case == 'bi': print('biで実行されます') result_path = 'result/bi' model = L.Classifier( nets_B.DocClassifyBi(n_vocab=args.vocab + 1, n_units=args.unit, n_layers=args.layer, n_out=4, dropout=args.dropout)) elif args.case == 'bi2' or args.case == 'bi_adam_2layer': print('bi改良版') result_path = 'result/bi2' model = L.Classifier( nets_B.DocClassifyBi2(n_vocab=args.vocab + 1, n_units=args.unit, n_layers=args.layer, n_out=4, dropout=args.dropout)) else: warnings.warn('指定したケースは存在しません。デフォルトで実行します') result_path = 'result/sample_result' model = L.Classifier( nets_B.DocClassify(n_vocab=args.vocab + 1, n_units=args.unit, n_layers=args.layer, n_out=4, dropout=args.dropout)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # get_device_from_id(args.gpu).use() model.to_gpu() if args.opt == 'sgd': result_path += '_sgd' print('SGD') optimizer = optimizers.SGD(lr=0.01) elif args.opt == 'adam': result_path += '_adam' print('Adam') optimizer = optimizers.Adam() elif args.opt == 'bi_adam_2layer': result_path += '_adam_2layer' print('Adam') optimizer = optimizers.Adam() else: print('指定なしのためSGDで実行') optimizer = optimizers.SGD(lr=0.01) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip)) # optimizer.add_hook(chainer.optimizer.Lasso(0.01)) updater = training.StandardUpdater(train_iter, optimizer, converter=convert_seq, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_path) trainer.extend(extensions.LogReport()) if not args.dbg_on: trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(valid_iter, model, converter=convert_seq, device=args.gpu), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time' ])) trainer.extend( extensions.ParameterStatistics(model.predictor.doc_enc, {'std': np.std})) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) if args.model: serializers.load_npz(args.model, trainer) trainer.run() pass
def main(): args = arguments() # chainer.config.type_check = False chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() #print('Chainer version: ', chainer.__version__) #print('GPU availability:', chainer.cuda.available) #print('cuDNN availability:', chainer.cuda.cudnn_enabled) ## dataset preparation if args.imgtype == "dcm": from dataset_dicom import Dataset else: from dataset import Dataset train_d = Dataset(args.train, args.root, args.from_col, args.to_col, crop=(args.crop_height, args.crop_width), random=args.random_translate, grey=args.grey) test_d = Dataset(args.val, args.root, args.from_col, args.to_col, crop=(args.crop_height, args.crop_width), random=args.random_translate, grey=args.grey) # setup training/validation data iterators train_iter = chainer.iterators.SerialIterator(train_d, args.batch_size) test_iter = chainer.iterators.SerialIterator(test_d, args.nvis, shuffle=False) test_iter_gt = chainer.iterators.SerialIterator( train_d, args.nvis, shuffle=False) ## same as training data; used for validation args.ch = len(train_d[0][0]) args.out_ch = len(train_d[0][1]) print("Input channels {}, Output channels {}".format(args.ch, args.out_ch)) ## Set up models gen = net.Generator(args) dis = net.Discriminator(args) ## load learnt models optimiser_files = [] if args.model_gen: serializers.load_npz(args.model_gen, gen) print('model loaded: {}'.format(args.model_gen)) optimiser_files.append(args.model_gen.replace('gen_', 'opt_gen_')) if args.model_dis: serializers.load_npz(args.model_dis, dis) print('model loaded: {}'.format(args.model_dis)) optimiser_files.append(args.model_dis.replace('dis_', 'opt_dis_')) ## send models to GPU if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() gen.to_gpu() dis.to_gpu() # Setup optimisers def make_optimizer(model, lr, opttype='Adam'): # eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = optim[opttype](lr) if args.weight_decay > 0: if opttype in ['Adam', 'AdaBound', 'Eve']: optimizer.weight_decay_rate = args.weight_decay else: if args.weight_decay_norm == 'l2': optimizer.add_hook( chainer.optimizer.WeightDecay(args.weight_decay)) else: optimizer.add_hook( chainer.optimizer_hooks.Lasso(args.weight_decay)) optimizer.setup(model) return optimizer opt_gen = make_optimizer(gen, args.learning_rate, args.optimizer) opt_dis = make_optimizer(dis, args.learning_rate, args.optimizer) optimizers = {'opt_g': opt_gen, 'opt_d': opt_dis} ## resume optimisers from file if args.load_optimizer: for (m, e) in zip(optimiser_files, optimizers): if m: try: serializers.load_npz(m, optimizers[e]) print('optimiser loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # Set up trainer updater = pixupdater( models=(gen, dis), iterator={ 'main': train_iter, 'test': test_iter, 'test_gt': test_iter_gt }, optimizer={ 'gen': opt_gen, 'dis': opt_dis }, # converter=convert.ConcatWithAsyncTransfer(), params={'args': args}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) ## save learnt results at an interval if args.snapinterval < 0: args.snapinterval = args.epoch snapshot_interval = (args.snapinterval, 'epoch') display_interval = (args.display_interval, 'iteration') trainer.extend(extensions.snapshot_object(gen, 'gen_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(opt_gen, 'opt_gen_{.updater.epoch}.npz'), trigger=snapshot_interval) if args.lambda_dis > 0: trainer.extend(extensions.snapshot_object(dis, 'dis_{.updater.epoch}.npz'), trigger=snapshot_interval) trainer.extend( extensions.dump_graph('dis/loss_real', out_name='dis.dot')) trainer.extend(extensions.snapshot_object( opt_dis, 'opt_dis_{.updater.epoch}.npz'), trigger=snapshot_interval) if args.lambda_rec_l1 > 0: trainer.extend(extensions.dump_graph('gen/loss_L1', out_name='gen.dot')) elif args.lambda_rec_l2 > 0: trainer.extend(extensions.dump_graph('gen/loss_L2', out_name='gen.dot')) ## log outputs log_keys = ['epoch', 'iteration', 'lr'] log_keys_gen = [ 'gen/loss_L1', 'gen/loss_L2', 'gen/loss_dis', 'myval/loss_L2', 'gen/loss_tv' ] log_keys_dis = ['dis/loss_real', 'dis/loss_fake', 'dis/loss_mispair'] trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport(log_keys + log_keys_gen + log_keys_dis), trigger=display_interval) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(log_keys_gen, 'iteration', trigger=display_interval, file_name='loss_gen.png')) trainer.extend( extensions.PlotReport(log_keys_dis, 'iteration', trigger=display_interval, file_name='loss_dis.png')) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.ParameterStatistics(gen)) # learning rate scheduling if args.optimizer in ['SGD', 'Momentum', 'AdaGrad', 'RMSprop']: trainer.extend(extensions.observe_lr(optimizer_name='gen'), trigger=display_interval) trainer.extend(extensions.ExponentialShift('lr', 0.33, optimizer=opt_gen), trigger=(args.epoch / 5, 'epoch')) trainer.extend(extensions.ExponentialShift('lr', 0.33, optimizer=opt_dis), trigger=(args.epoch / 5, 'epoch')) elif args.optimizer in ['Adam', 'AdaBound', 'Eve']: trainer.extend(extensions.observe_lr(optimizer_name='gen'), trigger=display_interval) trainer.extend(extensions.ExponentialShift("alpha", 0.33, optimizer=opt_gen), trigger=(args.epoch / 5, 'epoch')) trainer.extend(extensions.ExponentialShift("alpha", 0.33, optimizer=opt_dis), trigger=(args.epoch / 5, 'epoch')) # evaluation vis_folder = os.path.join(args.out, "vis") os.makedirs(vis_folder, exist_ok=True) if not args.vis_freq: args.vis_freq = len(train_d) // 2 trainer.extend(VisEvaluator({ "test": test_iter, "train": test_iter_gt }, {"gen": gen}, params={'vis_out': vis_folder}, device=args.gpu), trigger=(args.vis_freq, 'iteration')) # ChainerUI: removed until ChainerUI updates to be compatible with Chainer 6.0 # trainer.extend(CommandsExtension()) # Run the training print("trainer start") trainer.run()
def main(): # Check if GPU is available if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") parser = argparse.ArgumentParser( description='Training ResNet50 on ImageNet') # Data parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--train_root', default='.') parser.add_argument('--val_root', default='.') parser.add_argument('--mean', default='mean.npy') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--iterator', default='thread') # Training Settings parser.add_argument('--arch_file', type=str, default='models/resnet50.py') parser.add_argument('--arch_name', type=str, default='ResNet50') parser.add_argument('--initmodel') parser.add_argument('--resume', default='') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--val_batchsize', type=int, default=16) parser.add_argument('--acc_iters', type=int, default=1) parser.add_argument('--epoch', '-E', type=int, default=36) parser.add_argument('--normalize_weight', action='store_true', default=True) # NOQA parser.add_argument('--nw_skip_scale_comp', action='store_true', default=False) # NOQA # Hyper-parameters parser.add_argument('--lr', type=float, default=8.18e-3) parser.add_argument('--lr_plan', default='polynomial') parser.add_argument('--epoch_lr_decay_start', type=float, default=1) parser.add_argument('--polynomial_decay_p', type=float, default=11) parser.add_argument('--polynomial_epoch', type=float, default=53) parser.add_argument('--momentum', type=float, default=0.997) parser.add_argument('--adjust_momentum', action='store_true', default=True) # NOQA parser.add_argument('--mixup_alpha', type=float, default=0.4) parser.add_argument('--running_mixup', action='store_true', default=True) parser.add_argument('--re_rate', type=float, default=0.5) parser.add_argument('--re_area_rl', type=float, default=0.02) parser.add_argument('--re_area_rh', type=float, default=0.25) parser.add_argument('--re_aspect_rl', type=float, default=0.3) parser.add_argument('--cov_ema_decay', type=float, default=1.0) parser.add_argument('--damping', type=float, default=2.5e-4) parser.add_argument('--use_tensor_core', action='store_true', default=False) # NOQA parser.add_argument('--communicate_after_forward', action='store_true', default=False) # NOQA # Other parser.add_argument('--test', action='store_true', default=False) parser.add_argument('--stats', action='store_true', default=False) parser.add_argument('--config', type=str, default=None) parser.add_argument('--config_out', default='config.json') parser.add_argument('--out', '-o', default='result') args = parser.parse_args() dict_args = vars(args) # ======== Load config file ======== if args.config is not None: with open(args.config) as f: _config = json.load(f) dict_args.update(_config) # ======== Create communicator ======== comm = chainerkfac.create_communicator('pure_nccl') device = comm.intra_rank # ======== Create model ======== kwargs = { 'mixup_alpha': args.mixup_alpha, 'running_mixup': args.running_mixup, 're_area_rl': args.re_area_rl, 're_area_rh': args.re_area_rh, 're_aspect_rl': args.re_aspect_rl, 're_rate': args.re_rate, } arch = get_arch(args.arch_file, args.arch_name) model = arch(**kwargs) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) # ======== Copy model to GPU ======== try: chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() except chainer.cuda.cupy.cuda.runtime.CUDARuntimeError as e: print('[ERROR] Host: {}, GPU ID: {}'.format(socket.gethostname(), device), file=sys.stderr) raise e # ======== Create dataset ======== # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = datasets.read_pairs(args.train) val = datasets.read_pairs(args.val) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) train = datasets.CroppingImageDatasetIO(train, args.train_root, mean, model.insize, model.insize) val = datasets.CroppingImageDatasetIO(val, args.val_root, mean, model.insize, model.insize, False) # ======== Create iterator ======== if args.iterator == 'process': # We need to change the start method of multiprocessing module if we # are using InfiniBand and MultiprocessIterator. This is because # processes often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, n_processes=args.loaderjob, repeat=False, shuffle=False) elif args.iterator == 'thread': train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize, n_threads=args.loaderjob) val_iter = chainer.iterators.MultithreadIterator( val, args.val_batchsize, n_threads=args.loaderjob, repeat=False, shuffle=False) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(val, args.val_batchsize, repeat=False, shuffle=False) # ======== Create optimizer ======== optimizer = chainerkfac.optimizers.DistributedKFAC( comm, lr=args.lr, momentum=args.momentum, cov_ema_decay=args.cov_ema_decay, damping=args.damping, acc_iters=args.acc_iters, adjust_momentum=args.adjust_momentum, communicate_after_forward=args.communicate_after_forward, ) optimizer.setup(model) optimizer.use_fp32_update() if args.normalize_weight: link = getattr(optimizer, 'target') for param in link.params(): if getattr(param, 'normalize_weight', False): param.update_rule.add_hook( NormalizeWeightUR(skip_scale_comp=args.nw_skip_scale_comp)) if comm.rank == 0: print('indices: {}'.format(optimizer.indices)) # ======== Create updater ======== updater = training.StandardUpdater(train_iter, optimizer, device=device) # ======== Create trainer ======== trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) # ======== Extend trainer ======== val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Reduce the learning rate if args.lr_plan == 'polynomial': epoch_end = max(args.epoch, args.polynomial_epoch) trainer.extend(LrPolynomialDecay(args.lr, args.epoch_lr_decay_start, epoch_end, p=args.polynomial_decay_p), trigger=(args.acc_iters, 'iteration')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.stats: trainer.extend(extensions.ParameterStatistics(model)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if comm.rank == 0: hyperparams = optimizer.hyperparam.get_dict() for k, v in hyperparams.items(): print('{}: {}'.format(k, v)) # ======== Save configration ======== os.makedirs(args.out, exist_ok=True) my_config = {} my_config['args'] = vars(args) my_config['hyperparams'] = optimizer.hyperparam.get_dict() with open(os.path.join(args.out, args.config_out), 'w') as f: r = json.dumps(my_config) f.write(r) # Copy this file to args.out shutil.copy(os.path.realpath(__file__), args.out) chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) config.autotune = True config.cudnn_fast_batch_normalization = True trainer.run()
def train(args): config = yaml.load(open(args.config)) print('==========================================') # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Output version info print('chainer version: {}'.format(chainer.__version__)) print('cuda: {}, cudnn: {}, nccl: {}'.format(chainer.cuda.available, chainer.cuda.cudnn_enabled, HAVE_NCCL)) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config) print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) print('optimizer:', optimizer.__class__.__name__) # Setting up datasets train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) # Prepare devices devices = {'main': args.gpus[0]} for gid in args.gpus[1:]: devices['gpu{}'.format(gid)] = gid # Create iterators train_iter, valid_iter = create_iterators( train_dataset, config['dataset']['train']['batchsize'], valid_dataset, config['dataset']['valid']['batchsize'], devices) print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, devices) print('updater:', updater.__class__.__name__) # Create trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport': trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr': trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph': trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': evaluator_creator = get_evaluator_creator_from_config(values) evaluator = evaluator_creator(valid_iter, model, devices) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport': trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport': trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar': upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot': filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) elif ext == 'ParameterStatistics': links = [] for link_name in values.pop('links'): lns = [ln.strip() for ln in link_name.split('.') if ln.strip()] target = model.predictor for ln in lns: target = getattr(target, ln) links.append(target) trainer.extend(extensions.ParameterStatistics(links, **values)) elif ext == 'custom': custom_extension = get_custum_extension_from_config(values) trainer.extend(custom_extension, trigger=values['trigger']) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) # Resume if args.resume is not None: fn = '{}.bak'.format(args.resume) shutil.copy(args.resume, fn) serializers.load_npz(args.resume, trainer) print('Resumed from:', args.resume) print('==========================================') trainer.run() return 0
def run(dataset, word2vec, epoch, frequency, gpu, out, model, batchsize, lr, fix_embedding, resume): """ Train multi-domain user review classification using Blitzer et al.'s dataset (https://www.cs.jhu.edu/~mdredze/datasets/sentiment/) Please refer README.md for details. """ memory = Memory(cachedir=out, verbose=1) w2v, vocab, train_dataset, dev_dataset, _, label_dict, domain_dict = \ memory.cache(prepare_blitzer_data)(dataset, word2vec) if model == 'rnn': model = multidomain_sentiment.models.create_rnn_predictor( len(domain_dict), w2v.shape[0], w2v.shape[1], 300, len(label_dict), 2, 300, dropout_rnn=0.1, initialEmb=w2v, dropout_emb=0.1, fix_embedding=fix_embedding) elif model == 'cnn': model = multidomain_sentiment.models.create_cnn_predictor( len(domain_dict), w2v.shape[0], w2v.shape[1], 300, len(label_dict), 300, dropout_fc=0.1, initialEmb=w2v, dropout_emb=0.1, fix_embedding=fix_embedding) else: assert not "should not get here" classifier = multidomain_sentiment.models.MultiDomainClassifier( model, domain_dict=domain_dict) if gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(gpu).use() classifier.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam(alpha=lr) optimizer.setup(classifier) train_iter = chainer.iterators.SerialIterator(train_dataset, batchsize) # Set up a trainer updater = training.StandardUpdater( train_iter, optimizer, device=gpu, converter=multidomain_sentiment.training.convert) if dev_dataset is not None: stop_trigger = EarlyStoppingTrigger(monitor='validation/main/loss', max_trigger=(epoch, 'epoch')) trainer = training.Trainer(updater, stop_trigger, out=out) logger.info("train: {}, dev: {}".format(len(train_dataset), len(dev_dataset))) # Evaluate the model with the development dataset for each epoch dev_iter = chainer.iterators.SerialIterator(dev_dataset, batchsize, repeat=False, shuffle=False) evaluator = extensions.Evaluator( dev_iter, classifier, device=gpu, converter=multidomain_sentiment.training.convert) trainer.extend(evaluator, trigger=frequency) # This works together with EarlyStoppingTrigger to provide more reliable # early stopping trainer.extend(SaveRestore(), trigger=chainer.training.triggers.MinValueTrigger( 'validation/main/loss')) else: trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) logger.info("train: {}".format(len(train_dataset))) # SaveRestore will save the snapshot when dev_dataset is available trainer.extend(extensions.snapshot(), trigger=frequency) logger.info("With labels: %s" % json.dumps(label_dict)) # Take a snapshot for each specified epoch if gpu < 0: # ParameterStatistics does not work with GPU as of chainer 2.x # https://github.com/chainer/chainer/issues/3027 trainer.extend(extensions.ParameterStatistics(model, trigger=(100, 'iteration')), priority=99) # Write a log of evaluation statistics for each iteration trainer.extend(extensions.LogReport(trigger=(1, 'iteration')), priority=98) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=frequency, priority=97) if resume: # Resume from a snapshot chainer.serializers.load_npz(resume, trainer) logger.info("Started training") trainer.run() # Save final model (without trainer) chainer.serializers.save_npz(os.path.join(out, 'trained_model'), model) with open(os.path.join(out, 'vocab.json'), 'w') as fout: json.dump(vocab, fout)
trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.Evaluator(valid_iter, model, converter=util.converter, device=device), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'val/main/loss', 'elapsed_time' ])) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.ParameterStatistics(model.predictor.W, {'mean': xp.mean}, report_grads=True)) trainer.run() gen_model = util.Generator(predictor=predictor, device=device, max_size=30) with chainer.using_config('train', False), chainer.using_config( 'enable_backprop', False): ys_list = gen_model(test) for ys in ys_list: for y in ys: y = int(y) if y is vocab['<eos>']: print('\n') break print(rvocab[y], end='') if device >= 0:
def get_trainer(args): config = yaml.load(open(args.config)) # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Show the setup information print('==========================================') print('Chainer version: {}'.format(chainer.__version__)) print('CuPy version: {}'.format(chainer.cuda.cupy.__version__)) print('cuda: {}, cudnn: {}, nccl: {}'.format( chainer.cuda.available, chainer.cuda.cudnn_enabled, )) # Prepare devices print('Devices:') devices = {'main': args.gpus[0]} print('\tmain:', args.gpus[0]) for gid in args.gpus[1:]: devices['gpu{}'.format(gid)] = gid print('\tgpu{}'.format(gid), gid) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir model_fn = config['model']['module'].split('.')[-1] sys.path.insert(0, args.result_dir) config['model']['module'] = model_fn else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config) print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) print('optimizer:', optimizer.__class__.__name__) # Setting up datasets train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) # Create iterators train_iter, valid_iter = create_iterators(train_dataset, valid_dataset, config) print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater and trainer if 'updater_creator' in config: updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, devices) else: updater = create_updater(train_iter, optimizer, devices) print('updater:', updater.__class__.__name__) # Create Trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport': trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr': trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph': trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': assert 'module' in values mod = import_module(values['module']) evaluator = getattr(mod, values['name']) if evaluator is extensions.Evaluator: evaluator = evaluator(valid_iter, model, device=args.gpus[0]) else: evaluator = evaluator(valid_iter, model.predictor) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport': trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport': trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar': upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot': filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) elif ext == 'ParameterStatistics': links = [] for link_name in values.pop('links'): lns = [ln.strip() for ln in link_name.split('.') if ln.strip()] target = model.predictor for ln in lns: target = getattr(target, ln) links.append(target) trainer.extend(extensions.ParameterStatistics(links, **values)) elif ext == 'custom': custom_extension = get_custum_extension_from_config(values) trainer.extend(custom_extension) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) if 'lr_drop_poly_power' in config['optimizer']: power = config['optimizer']['lr_drop_poly_power'] stop_trigger = config['stop_trigger'] batchsize = train_iter.batch_size len_dataset = len(train_dataset) trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize, len_dataset), trigger=(1, 'iteration')) # Resume if args.resume is not None: serializers.load_npz(args.resume, trainer) print('Resumed from:', args.resume) print('==========================================') return trainer
def main(): args = arguments() outdir = os.path.join(args.out, dt.now().strftime('%m%d_%H%M') + "_cgan") # chainer.config.type_check = False chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() #print('Chainer version: ', chainer.__version__) #print('GPU availability:', chainer.cuda.available) #print('cuDNN availability:', chainer.cuda.cudnn_enabled) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() ## dataset preparation train_d = Dataset(args.train, args.root, args.from_col, args.to_col, clipA=args.clipA, clipB=args.clipB, class_num=args.class_num, crop=(args.crop_height, args.crop_width), imgtype=args.imgtype, random=args.random_translate, grey=args.grey, BtoA=args.btoa) test_d = Dataset(args.val, args.root, args.from_col, args.to_col, clipA=args.clipA, clipB=args.clipB, class_num=args.class_num, crop=(args.crop_height, args.crop_width), imgtype=args.imgtype, random=args.random_translate, grey=args.grey, BtoA=args.btoa) args.crop_height, args.crop_width = train_d.crop if (len(train_d) == 0): print("No images found!") exit() # setup training/validation data iterators train_iter = chainer.iterators.SerialIterator(train_d, args.batch_size) test_iter = chainer.iterators.SerialIterator(test_d, args.nvis, shuffle=False) test_iter_gt = chainer.iterators.SerialIterator( train_d, args.nvis, shuffle=False) ## same as training data; used for validation args.ch = len(train_d[0][0]) args.out_ch = len(train_d[0][1]) print("Input channels {}, Output channels {}".format(args.ch, args.out_ch)) if (len(train_d) * len(test_d) == 0): print("No images found!") exit() ## Set up models # shared pretrained layer if (args.gen_pretrained_encoder and args.gen_pretrained_lr_ratio == 0): if "resnet" in args.gen_pretrained_encoder: pretrained = L.ResNet50Layers() print("Pretrained ResNet model loaded.") else: pretrained = L.VGG16Layers() print("Pretrained VGG model loaded.") if args.gpu >= 0: pretrained.to_gpu() enc_x = net.Encoder(args, pretrained) else: enc_x = net.Encoder(args) # gen = net.Generator(args) dec_y = net.Decoder(args) if args.lambda_dis > 0: dis = net.Discriminator(args) models = {'enc_x': enc_x, 'dec_y': dec_y, 'dis': dis} else: dis = L.Linear(1, 1) models = {'enc_x': enc_x, 'dec_y': dec_y} ## load learnt models optimiser_files = [] if args.model_gen: serializers.load_npz(args.model_gen, enc_x) serializers.load_npz(args.model_gen.replace('enc_x', 'dec_y'), dec_y) print('model loaded: {}, {}'.format( args.model_gen, args.model_gen.replace('enc_x', 'dec_y'))) optimiser_files.append(args.model_gen.replace('enc_x', 'opt_enc_x')) optimiser_files.append(args.model_gen.replace('enc_x', 'opt_dec_y')) if args.model_dis: serializers.load_npz(args.model_dis, dis) print('model loaded: {}'.format(args.model_dis)) optimiser_files.append(args.model_dis.replace('dis', 'opt_dis')) ## send models to GPU if args.gpu >= 0: enc_x.to_gpu() dec_y.to_gpu() dis.to_gpu() # Setup optimisers def make_optimizer(model, lr, opttype='Adam', pretrained_lr_ratio=1.0): # eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = optim[opttype](lr) optimizer.setup(model) if args.weight_decay > 0: if opttype in ['Adam', 'AdaBound', 'Eve']: optimizer.weight_decay_rate = args.weight_decay else: if args.weight_decay_norm == 'l2': optimizer.add_hook( chainer.optimizer.WeightDecay(args.weight_decay)) else: optimizer.add_hook( chainer.optimizer_hooks.Lasso(args.weight_decay)) return optimizer opt_enc_x = make_optimizer(enc_x, args.learning_rate_gen, args.optimizer) opt_dec_y = make_optimizer(dec_y, args.learning_rate_gen, args.optimizer) opt_dis = make_optimizer(dis, args.learning_rate_dis, args.optimizer) optimizers = {'enc_x': opt_enc_x, 'dec_y': opt_dec_y, 'dis': opt_dis} ## resume optimisers from file if args.load_optimizer: for (m, e) in zip(optimiser_files, optimizers): if m: try: serializers.load_npz(m, optimizers[e]) print('optimiser loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # finetuning if args.gen_pretrained_encoder: if args.gen_pretrained_lr_ratio == 0: enc_x.base.disable_update() else: for func_name in enc_x.encoder.base._children: for param in enc_x.encoder.base[func_name].params(): param.update_rule.hyperparam.eta *= args.gen_pretrained_lr_ratio # Set up trainer updater = Updater( models=(enc_x, dec_y, dis), iterator={'main': train_iter}, optimizer=optimizers, # converter=convert.ConcatWithAsyncTransfer(), params={'args': args}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) ## save learnt results at a specified interval or at the end of training if args.snapinterval < 0: args.snapinterval = args.epoch snapshot_interval = (args.snapinterval, 'epoch') display_interval = (args.display_interval, 'iteration') for e in models: trainer.extend(extensions.snapshot_object(models[e], e + '{.updater.epoch}.npz'), trigger=snapshot_interval) if args.parameter_statistics: trainer.extend(extensions.ParameterStatistics( models[e])) ## very slow for e in optimizers: trainer.extend(extensions.snapshot_object( optimizers[e], 'opt_' + e + '{.updater.epoch}.npz'), trigger=snapshot_interval) ## plot NN graph if args.lambda_rec_l1 > 0: trainer.extend( extensions.dump_graph('dec_y/loss_L1', out_name='enc.dot')) elif args.lambda_rec_l2 > 0: trainer.extend( extensions.dump_graph('dec_y/loss_L2', out_name='gen.dot')) elif args.lambda_rec_ce > 0: trainer.extend( extensions.dump_graph('dec_y/loss_CE', out_name='gen.dot')) if args.lambda_dis > 0: trainer.extend( extensions.dump_graph('dis/loss_real', out_name='dis.dot')) ## log outputs log_keys = ['epoch', 'iteration', 'lr'] log_keys_gen = ['myval/loss_L1', 'myval/loss_L2'] log_keys_dis = [] if args.lambda_rec_l1 > 0: log_keys_gen.append('dec_y/loss_L1') if args.lambda_rec_l2 > 0: log_keys_gen.append('dec_y/loss_L2') if args.lambda_rec_ce > 0: log_keys_gen.extend(['dec_y/loss_CE', 'myval/loss_CE']) if args.lambda_reg > 0: log_keys.extend(['enc_x/loss_reg']) if args.lambda_tv > 0: log_keys_gen.append('dec_y/loss_tv') if args.lambda_dis > 0: log_keys_dis.extend( ['dec_y/loss_dis', 'dis/loss_real', 'dis/loss_fake']) if args.lambda_mispair > 0: log_keys_dis.append('dis/loss_mispair') if args.dis_wgan: log_keys_dis.extend(['dis/loss_gp']) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport(log_keys + log_keys_gen + log_keys_dis), trigger=display_interval) if extensions.PlotReport.available(): # trainer.extend(extensions.PlotReport(['lr'], 'iteration',trigger=display_interval, file_name='lr.png')) trainer.extend( extensions.PlotReport(log_keys_gen, 'iteration', trigger=display_interval, file_name='loss_gen.png', postprocess=plot_log)) trainer.extend( extensions.PlotReport(log_keys_dis, 'iteration', trigger=display_interval, file_name='loss_dis.png')) trainer.extend(extensions.ProgressBar(update_interval=10)) # learning rate scheduling trainer.extend(extensions.observe_lr(optimizer_name='enc_x'), trigger=display_interval) if args.optimizer in ['Adam', 'AdaBound', 'Eve']: lr_target = 'eta' else: lr_target = 'lr' if args.lr_drop > 0: ## cosine annealing for e in [opt_enc_x, opt_dec_y, opt_dis]: trainer.extend(CosineShift(lr_target, args.epoch // args.lr_drop, optimizer=e), trigger=(1, 'epoch')) else: for e in [opt_enc_x, opt_dec_y, opt_dis]: #trainer.extend(extensions.LinearShift('eta', (1.0,0.0), (decay_start_iter,decay_end_iter), optimizer=e)) trainer.extend(extensions.ExponentialShift('lr', 0.33, optimizer=e), trigger=(args.epoch // args.lr_drop, 'epoch')) # evaluation vis_folder = os.path.join(outdir, "vis") os.makedirs(vis_folder, exist_ok=True) if not args.vis_freq: args.vis_freq = max(len(train_d) // 2, 50) trainer.extend(VisEvaluator({ "test": test_iter, "train": test_iter_gt }, { "enc_x": enc_x, "dec_y": dec_y }, params={ 'vis_out': vis_folder, 'args': args }, device=args.gpu), trigger=(args.vis_freq, 'iteration')) # ChainerUI: removed until ChainerUI updates to be compatible with Chainer 6.0 trainer.extend(CommandsExtension()) # Run the training print("\nresults are saved under: ", outdir) save_args(args, outdir) trainer.run()
def set_event_handler(self): self.set_target() # (Not Implemented)Evaluator(train) self.trainer.extend(extensions.Evaluator( self.valid_loader, self.target, converter=self.converter, device=self.device, ), trigger=(self.eval_interval, 'epoch'), call_before_training=self.call_before_training) self.trainer.extend(extensions.ProgressBar()) self.trainer.extend(extensions.observe_lr()) # self.trainer.extend(extensions.MicroAverage('loss', 'lr', 'mav')) self.trainer.extend(extensions.LogReport(trigger=(self.log_interval, 'epoch')), call_before_training=self.call_before_training) self.trainer.extend(extensions.FailOnNonNumber()) # self.trainer.extend(extensions.ExponentialShift('lr', rate=0.9)) self.trainer.extend( extensions.ExponentialShift('lr', rate=0.99, init=self.lr * 10.0)) # (Not Implemented)InverseShift # (Not Implemented)LinearShift # (Not Implemented)MultistepShift # (Not Implemented)PolynomialShift # (Not Implemented)StepShift # (Not Implemented)WarmupShift self.trainer.extend( extensions.ParameterStatistics(self.model, trigger=(self.eval_interval, 'epoch'))) self.trainer.extend(extensions.VariableStatisticsPlot(self.model)) self.trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy', 'elapsed_time' ]), call_before_training=self.call_before_training) self.trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'), call_before_training=self.call_before_training) self.trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'), call_before_training=self.call_before_training) self.trainer.extend(extensions.snapshot(n_retains=self.retain_num), trigger=(self.log_interval, 'epoch')) self.set_additonal_event_handler()
''' from chainer import optimizers from chainer import training # ネットワークを作成 predictor = MLP() # L.Classifier でラップし、損失の計算などをモデルに含める net = L.Classifier(predictor) # 最適化手法を選択してオプティマイザを作成し、最適化対象のネットワークを持たせる optimizer = optimizers.MomentumSGD(lr=0.1).setup(net) # アップデータにイテレータとオプティマイザを渡す updater = training.StandardUpdater(train_iter, optimizer, device=-1) # device=-1でCPUでの計算実行を指定 trainer = training.Trainer(updater, (30, 'epoch'), out='results/iris_result1') from chainer.training import extensions trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log')) trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.Evaluator(valid_iter, net, device=-1), name='val') trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'fc1/W/data/mean', 'elapsed_time'])) trainer.extend(extensions.PlotReport(['fc1/W/grad/mean'], x_key='epoch', file_name='mean.png')) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.ParameterStatistics(net.predictor.fc1, {'mean': np.mean}, report_grads=True)) trainer.run()
model = TripletLossClassifier(model, lossfun=F.triplet) optimizer = optimizers.SGD(lr=0.01).setup(model) updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='mnist_result') # Extensions for trainer trainer.extend(extensions.LogReport()) trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time' ])) trainer.extend( extensions.ParameterStatistics(model.predictor.l1, {'std': np.std})) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # Run trainer.run() # Evaluate w/ test data test_evaluator = extensions.Evaluator(test_iter, model, device=gpu_id) results = test_evaluator()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='configs/base.yml', help='path to config file') parser.add_argument('--gpu', type=int, default=0, help='index of gpu to be used') parser.add_argument('--input_dir', type=str, default='./data/imagenet') parser.add_argument('--truth_dir', type=str, default='./data/imagenet') parser.add_argument('--results_dir', type=str, default='./results/gans', help='directory to save the results to') parser.add_argument('--snapshot', type=str, default='', help='path to the snapshot file to use') parser.add_argument('--enc_model', type=str, default='', help='path to the generator .npz file') parser.add_argument('--gen_model', type=str, default='', help='path to the generator .npz file') parser.add_argument('--dis_model', type=str, default='', help='path to the discriminator .npz file') parser.add_argument('--loaderjob', type=int, help='number of parallel data loading processes') args = parser.parse_args() config = yaml_utils.Config(yaml.load(open(args.config_path))) chainer.cuda.get_device_from_id(args.gpu).use() gen, dis, enc = load_models(config) chainer.serializers.load_npz(args.gen_model, gen, strict=False) chainer.serializers.load_npz(args.dis_model, dis) chainer.serializers.load_npz(args.enc_model, enc) gen.to_gpu(device=args.gpu) dis.to_gpu(device=args.gpu) enc.to_gpu(device=args.gpu) models = {"gen": gen, "dis": dis, "enc": enc} opt_gen = make_optimizer(gen, alpha=config.adam['alpha'], beta1=config.adam['beta1'], beta2=config.adam['beta2']) opt_gen.add_hook(chainer.optimizer.WeightDecay(config.weight_decay)) opt_gen.add_hook(chainer.optimizer.GradientClipping(config.grad_clip)) # disable update of pre-trained weights layers_to_train = ['lA1', 'lA2', 'lB1', 'lB2', 'preluW', 'preluMiddleW'] for layer in gen.children(): if not layer.name in layers_to_train: layer.disable_update() lmd_pixel = 0.05 def fast_loss(out, gt): l1 = reconstruction_loss(dis, out, gt) l2 = lmd_pixel * pixel_loss(out, gt) loss = l1 + l2 return loss gen.set_fast_loss(fast_loss) opts = {"opt_gen": opt_gen} # Dataset config['dataset']['args']['root_input'] = args.input_dir config['dataset']['args']['root_truth'] = args.truth_dir dataset = yaml_utils.load_dataset(config) # Iterator iterator = chainer.iterators.MultiprocessIterator( dataset, config.batchsize, n_processes=args.loaderjob) kwargs = config.updater['args'] if 'args' in config.updater else {} kwargs.update({ 'models': models, 'iterator': iterator, 'optimizer': opts, }) updater = yaml_utils.load_updater_class(config) updater = updater(**kwargs) out = args.results_dir create_result_dir(out, args.config_path, config) trainer = training.Trainer(updater, (config.iteration, 'iteration'), out=out) report_keys = [ "loss_noab", "loss1", "loss2", "loss3", "fast_alpha", "loss_ae", "fast_benefit", "min_slope", "max_slope", "min_slope_middle", "max_slope_middle" ] # Set up logging trainer.extend(extensions.snapshot(), trigger=(config.snapshot_interval, 'iteration')) for m in models.values(): trainer.extend(extensions.snapshot_object( m, m.__class__.__name__ + '_{.updater.iteration}.npz'), trigger=(config.snapshot_interval, 'iteration')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(config.display_interval, 'iteration'))) trainer.extend(extensions.ParameterStatistics(gen), trigger=(config.display_interval, 'iteration')) trainer.extend(extensions.PrintReport(report_keys), trigger=(config.display_interval, 'iteration')) trainer.extend(sample_reconstruction_auxab(enc, gen, out, n_classes=gen.n_classes), trigger=(config.evaluation_interval, 'iteration'), priority=extension.PRIORITY_WRITER) trainer.extend( extensions.ProgressBar(update_interval=config.progressbar_interval)) ext_opt_gen = extensions.LinearShift( 'alpha', (config.adam['alpha'], 0.), (config.iteration_decay_start, config.iteration), opt_gen) trainer.extend(ext_opt_gen) if args.snapshot: print("Resume training with snapshot:{}".format(args.snapshot)) chainer.serializers.load_npz(args.snapshot, trainer) # Run the training print("start training") trainer.run()
# ネットワークをClassifierで包んで、ロスの計算などをモデルに含める net = L.Classifier(net, lossfun=F.softmax_cross_entropy, accfun=F.accuracy) # 最適化手法の選択 optimizer = optimizers.SGD(lr=0.01).setup(net) # UpdaterにIteratorとOptimizerを渡す updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) max_epoch = 10 # TrainerにUpdaterを渡す trainer = training.Trainer( updater, (max_epoch, 'epoch'), out='mnist_result') trainer.extend(extensions.LogReport()) trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(valid_iter, net, device=gpu_id), name='val') trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time'])) trainer.extend(extensions.ParameterStatistics(net.predictor.l1, {'std': np.std})) trainer.extend(extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std.png')) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run() chainer.serializers.save_npz('my_mnist.npz', net)
max_epoch = 10 trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='werewolf_result_batchsize14_nofirstday_1hl') trainer.extend(extensions.LogReport()) trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) trainer.extend(extensions.Evaluator(valid_iter, network, device=gpu_id), name='val') trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time' ])) trainer.extend( extensions.ParameterStatistics(network.predictor.l1, {'std': np.std})) trainer.extend( extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std.png')) trainer.extend( extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def run(epoch, frequency, gpu, out, word2vec, beer_train, beer_labels, beer_test, batchsize, negative_samples, ntopics, lr, orthogonality_penalty, fix_embedding, resume): if (beer_labels is None) != (beer_test is None): raise click.BadParameter( "Both or neither beer-labels and beer-test can be specified") if beer_train is None: logger.info('Using 20newsgroup dataset') memory = Memory(cachedir=out, verbose=1) w2v, vocab, train, test, topic_vectors, label_dict = \ memory.cache(abae.dataset.prepare_20news)(word2vec, ntopics) else: logger.info('Using beer adovocate dataset.') memory = Memory(cachedir=out, verbose=1, mmap_mode='r') w2v, vocab, train, test, topic_vectors, label_dict = \ memory.cache(abae.dataset.prepare_beer_advocate)( beer_train, beer_test, beer_labels, word2vec, ntopics) model = abae.model.ABAE(w2v.shape[0], w2v.shape[1], ntopics, fix_embedding=fix_embedding, orthogonality_penalty=orthogonality_penalty) model.initialize(w2v, topic_vectors) if gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam(alpha=lr) optimizer.setup(model) train_iter = abae.iterator.NegativeSampleIterator(train, batchsize, negative_samples) # Set up a trainer updater = training.StandardUpdater( train_iter, optimizer, device=gpu, converter=abae.iterator.concat_examples_ns) trainer = training.Trainer(updater, (epoch, 'epoch'), out=out) if test is not None: logger.info("train: {}, test: {}".format(len(train), len(test))) # Evaluate the model with the test dataset for each epoch test_iter = abae.iterator.NegativeSampleIterator(test, batchsize, negative_samples, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator( test_iter, model, device=gpu, converter=abae.iterator.concat_examples_ns), trigger=(500, 'iteration')) trainer.extend(abae.evaluator.TopicMatchEvaluator( test_iter, model, label_dict=label_dict, device=gpu, converter=abae.iterator.concat_examples_ns), trigger=(500, 'iteration')) else: logger.info("train: {}".format(len(train))) logger.info("With labels: %s" % json.dumps(label_dict)) # Take a snapshot for each specified epoch trigger = (epoch, 'epoch') if frequency == -1 else (frequency, 'iteration') trainer.extend(extensions.snapshot(), trigger=trigger) if gpu < 0: # ParameterStatistics does not work with GPU as of chainer 2.x # https://github.com/chainer/chainer/issues/3027 trainer.extend( extensions.ParameterStatistics(model, trigger=(10, 'iteration'))) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(10, 'iteration'))) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if resume: # Resume from a snapshot chainer.serializers.load_npz(resume, trainer) # Run the training trainer.run() # Save final model (without trainer) model.save(os.path.join(out, 'trained_model')) with open(os.path.join(out, 'vocab.json'), 'wb') as fout: json.dump(vocab, fout)