def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory model_dir_name = 'AEFINet_BN_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) #loading dataset print('# loading dataet(General100_train, General100_test) ...') if args.iter_parallel: train = ds.SequenceDataset(dataset='train') test = ds.SequenceDataset(dataset='test') else: train = ds.SequenceDatasetOnMem(dataset='train') test = ds.SequenceDatasetOnMem(dataset='test') # prepare model model = N.GenEvaluator(N.AEFINetBN(f_size=args.fsize, ch=args.ch)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save final model model_outdir = path.join(ROOT_PATH, 'models', model_dir_name) if not path.exists(model_outdir): os.makedirs(model_outdir) model_name = 'AEFINet_opt_{}_ch_{}_fsize_{}.npz'.format( args.opt, args.ch, args.fsize) chainer.serializers.save_npz(path.join(model_outdir, model_name), model) model_parameter = { 'name': 'AEFINet', 'parameter': { 'f_size': args.fsize, 'ch': args.ch } } with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f: json.dump(model_parameter, f)
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # make result dir network_name = 'AEFINetConcat' model_name = 'AEFINet_Test_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_name) util.make_result_dir(args, outdir) #loading dataset if args.iter_parallel: train = datasets.SequenceDataset( dataset='UCF101_train_size64_frame3_group10_max100_p') test = datasets.SequenceDataset( dataset='UCF101_test_size64_frame3_group25_max5_p') else: train = datasets.SequenceDatasetOnMem( dataset='UCF101_train_size64_frame3_group10_max100_p') test = datasets.SequenceDatasetOnMem( dataset='UCF101_test_size64_frame3_group25_max5_p') # prepare model chainer.cuda.get_device_from_id(args.gpu0).use() model = N.GenEvaluator(N.AEFINetConcat(f_size=args.fsize, ch=args.ch)) # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(100, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # start train trainer.run() # save final model util.save_trained_model(model_name, model, network_name, f_size=args.fsize, ch=args.ch)