def test_save_args_dict(self): out_path = os.path.join(self._dir, 'result') conditions = { 'int': 1, 'float': 0.1, 'str': 'foo', 'inner': { 'int': 1 }, 'array': ['boo'] } save_args(conditions, out_path) args_path = os.path.join(out_path, 'args') assert os.path.exists(args_path) with open(args_path) as f: target = json.load(f) assert len(target) == 5 assert target['int'] == 1 assert target['float'] == 0.1 assert target['str'] == 'foo' assert target['inner'] == {'int': 1} assert target['array'] == ['boo']
def test_save_args_argparse(self): out_path = os.path.join(self._dir, 'result2') parser = self._create_parser() args = parser.parse_args(['-s', 'foo', '-i', '-1', '-a', '0', '100']) save_args(args, out_path) args_path = os.path.join(out_path, 'args') assert os.path.exists(args_path) with open(args_path) as f: target = json.load(f) assert len(target) == 3 assert target['i'] == str(-1) assert target['s'] == 'foo' assert target['a'] == "[0, 100]"
def prep_Trainer(_model, _updater, result_dir, _test_iter, cnf="./_json/configure001.json", _args=None, logger=None): from chainerui.extensions import CommandsExtension from chainerui.utils import save_args from chainer import training from chainer.training import extensions trainer_cnf = read_cnf(cnf, q="Trainer") _epoch = trainer_cnf["epoch"] _device = trainer_cnf["device"] _console = trainer_cnf["console"] logger.log(_tag="Trainer", _msg="Setting accepted.") trainer = training.Trainer(_updater, (_epoch, 'epoch'), out=result_dir) trainer.extend(extensions.Evaluator(_test_iter, _model, device=_device)) trainer.extend( extensions.dump_graph(root_name="main/loss", out_name="predictor.dot")) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PlotReport(["main/loss", "validation/main/loss"], "epoch", file_name="loss.png")) trainer.extend( extensions.PlotReport(["main/accuracy", "validation/main/accuracy"], "epoch", file_name="accuracy.png")) if _console: trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', "main/accuracy", "validation/main/accuracy", "elapsed_time" ])) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.observe_lr()) trainer.extend(CommandsExtension()) if _args: save_args(args, result_dir) return trainer
def main(): args = arguments() out = os.path.join(args.out, dt.now().strftime('%m%d_%H%M_AE')) print(args) print(out) save_args(args, out) args.dtype = dtypes[args.dtype] args.dis_activation = activation[args.dis_activation] args.gen_activation = activation[args.gen_activation] args.gen_out_activation = activation[args.gen_out_activation] args.gen_nblock = args.gen_nblock // 2 # to match ordinary cycleGAN if args.imgtype=="dcm": from dataset_dicom import DatasetOutMem as Dataset else: from dataset_jpg import DatasetOutMem as Dataset if not chainer.cuda.available: print("CUDA required") if len(args.gpu)==1 and args.gpu[0] >= 0: chainer.cuda.get_device_from_id(args.gpu[0]).use() # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = args.dtype chainer.print_runtime_info() # Turn off type check # chainer.config.type_check = False # print('Chainer version: ', chainer.__version__) # print('GPU availability:', chainer.cuda.available) # print('cuDNN availablility:', chainer.cuda.cudnn_enabled) ## dataset iterator print("Setting up data iterators...") train_A_dataset = Dataset( path=os.path.join(args.root, 'trainA'), baseA=args.HU_base, rangeA=args.HU_range, slice_range=args.slice_range, crop=(args.crop_height,args.crop_width),random=args.random_translate, forceSpacing=0, imgtype=args.imgtype, dtype=args.dtype) train_B_dataset = Dataset( path=os.path.join(args.root, 'trainB'), baseA=args.HU_base, rangeA=args.HU_range, slice_range=args.slice_range, crop=(args.crop_height,args.crop_width), random=args.random_translate, forceSpacing=args.forceSpacing, imgtype=args.imgtype, dtype=args.dtype) test_A_dataset = Dataset( path=os.path.join(args.root, 'testA'), baseA=args.HU_base, rangeA=args.HU_range, slice_range=args.slice_range, crop=(args.crop_height,args.crop_width), random=0, forceSpacing=0, imgtype=args.imgtype, dtype=args.dtype) test_B_dataset = Dataset( path=os.path.join(args.root, 'testB'), baseA=args.HU_base, rangeA=args.HU_range, slice_range=args.slice_range, crop=(args.crop_height,args.crop_width), random=0, forceSpacing=args.forceSpacing, imgtype=args.imgtype, dtype=args.dtype) args.ch = train_A_dataset.ch test_A_iter = chainer.iterators.SerialIterator(test_A_dataset, args.nvis_A, shuffle=False) test_B_iter = chainer.iterators.SerialIterator(test_B_dataset, args.nvis_B, shuffle=False) if args.batch_size > 1: train_A_iter = chainer.iterators.MultiprocessIterator( train_A_dataset, args.batch_size, n_processes=3, shuffle=not args.conditional_discriminator) train_B_iter = chainer.iterators.MultiprocessIterator( train_B_dataset, args.batch_size, n_processes=3, shuffle=not args.conditional_discriminator) else: train_A_iter = chainer.iterators.SerialIterator( train_A_dataset, args.batch_size, shuffle=not args.conditional_discriminator) train_B_iter = chainer.iterators.SerialIterator( train_B_dataset, args.batch_size, shuffle=not args.conditional_discriminator) # setup models enc_x = net.Encoder(args) enc_y = net.Encoder(args) dec_x = net.Decoder(args) dec_y = net.Decoder(args) dis_x = net.Discriminator(args) dis_y = net.Discriminator(args) dis_z = net.Discriminator(args) models = {'enc_x': enc_x, 'enc_y': enc_y, 'dec_x': dec_x, 'dec_y': dec_y, 'dis_x': dis_x, 'dis_y': dis_y, 'dis_z': dis_z} optimiser_files = [] ## load learnt models if args.load_models: for e in models: m = args.load_models.replace('enc_x',e) try: serializers.load_npz(m, models[e]) print('model loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass optimiser_files.append(m.replace(e,'opt_'+e).replace('dis_','')) # select GPU if len(args.gpu) == 1: for e in models: models[e].to_gpu() print('use gpu {}, cuDNN {}'.format(args.gpu, chainer.cuda.cudnn_enabled)) else: print("mandatory GPU use: currently only a single GPU can be used") exit() # Setup optimisers def make_optimizer(model, alpha=0.0002, beta1=0.5): eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1, eps=eps) optimizer.setup(model) if args.weight_decay>0: if args.weight_decay_norm =='l2': optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight_decay)) else: optimizer.add_hook(chainer.optimizer_hooks.Lasso(args.weight_decay)) return optimizer opt_enc_x = make_optimizer(enc_x, alpha=args.learning_rate_g) opt_dec_x = make_optimizer(dec_x, alpha=args.learning_rate_g) opt_enc_y = make_optimizer(enc_y, alpha=args.learning_rate_g) opt_dec_y = make_optimizer(dec_y, alpha=args.learning_rate_g) opt_x = make_optimizer(dis_x, alpha=args.learning_rate_d) opt_y = make_optimizer(dis_y, alpha=args.learning_rate_d) opt_z = make_optimizer(dis_z, alpha=args.learning_rate_d) optimizers = {'opt_enc_x': opt_enc_x,'opt_dec_x': opt_dec_x,'opt_enc_y': opt_enc_y,'opt_dec_y': opt_dec_y,'opt_x': opt_x,'opt_y': opt_y,'opt_z': opt_z} if args.load_optimizer: for (m,e) in zip(optimiser_files,optimizers): if m: try: serializers.load_npz(m, optimizers[e]) print('optimiser loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # Set up an updater: TODO: multi gpu updater print("Preparing updater...") updater = Updater( models=(enc_x,dec_x,enc_y,dec_y, dis_x, dis_y, dis_z), iterator={ 'main': train_A_iter, 'train_B': train_B_iter, }, optimizer=optimizers, converter=convert.ConcatWithAsyncTransfer(), device=args.gpu[0], params={ 'args': args }) if args.snapinterval<0: args.snapinterval = args.lrdecay_start+args.lrdecay_period log_interval = (200, 'iteration') model_save_interval = (args.snapinterval, 'epoch') vis_interval = (args.vis_freq, 'iteration') plot_interval = (500, 'iteration') # Set up a trainer print("Preparing trainer...") trainer = training.Trainer(updater, (args.lrdecay_start + args.lrdecay_period, 'epoch'), out=out) for e in models: trainer.extend(extensions.snapshot_object( models[e], e+'{.updater.epoch}.npz'), trigger=model_save_interval) for e in optimizers: trainer.extend(extensions.snapshot_object( optimizers[e], e+'{.updater.epoch}.npz'), trigger=model_save_interval) log_keys = ['epoch', 'iteration'] log_keys_cycle = ['opt_enc_x/loss_cycle', 'opt_enc_y/loss_cycle', 'opt_dec_x/loss_cycle', 'opt_dec_y/loss_cycle', 'myval/cycle_y_l1'] log_keys_d = ['opt_x/loss_real','opt_x/loss_fake','opt_y/loss_real','opt_y/loss_fake','opt_z/loss_x','opt_z/loss_y'] log_keys_adv = ['opt_enc_y/loss_adv','opt_dec_y/loss_adv','opt_enc_x/loss_adv','opt_dec_x/loss_adv'] log_keys.extend([ 'opt_dec_y/loss_id']) log_keys.extend([ 'opt_enc_x/loss_reg','opt_enc_y/loss_reg', 'opt_dec_x/loss_air','opt_dec_y/loss_air', 'opt_dec_y/loss_tv']) log_keys_d.extend(['opt_x/loss_gp','opt_y/loss_gp']) log_keys_all = log_keys+log_keys_d+log_keys_adv+log_keys_cycle trainer.extend(extensions.LogReport(keys=log_keys_all, trigger=log_interval)) trainer.extend(extensions.PrintReport(log_keys_all), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=20)) trainer.extend(CommandsExtension()) ## to dump graph, set -lix 1 --warmup 0 # trainer.extend(extensions.dump_graph('opt_g/loss_id', out_name='gen.dot')) # trainer.extend(extensions.dump_graph('opt_x/loss', out_name='dis.dot')) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(log_keys[2:], 'iteration',trigger=plot_interval, file_name='loss.png')) trainer.extend(extensions.PlotReport(log_keys_d, 'iteration', trigger=plot_interval, file_name='loss_d.png')) trainer.extend(extensions.PlotReport(log_keys_adv, 'iteration', trigger=plot_interval, file_name='loss_adv.png')) trainer.extend(extensions.PlotReport(log_keys_cycle, 'iteration', trigger=plot_interval, file_name='loss_cyc.png')) ## output filenames of training dataset with open(os.path.join(out, 'trainA.txt'),'w') as output: output.writelines("\n".join(train_A_dataset.ids)) with open(os.path.join(out, 'trainB.txt'),'w') as output: output.writelines("\n".join(train_B_dataset.ids)) # archive the scripts rundir = os.path.dirname(os.path.realpath(__file__)) import zipfile with zipfile.ZipFile(os.path.join(out,'script.zip'), 'w', compression=zipfile.ZIP_DEFLATED) as new_zip: for f in ['trainAE.py','net.py','updaterAE.py','consts.py','losses.py','arguments.py','convert.py']: new_zip.write(os.path.join(rundir,f),arcname=f) ## visualisation vis_folder = os.path.join(out, "vis") if not os.path.exists(vis_folder): os.makedirs(vis_folder) # trainer.extend(visualize( (enc_x, enc_y, dec_y), vis_folder, test_A_iter, test_B_iter),trigger=(1, 'epoch')) trainer.extend(VisEvaluator({"main":test_A_iter, "testB":test_B_iter}, {"enc_x":enc_x, "enc_y":enc_y,"dec_x":dec_x,"dec_y":dec_y}, params={'vis_out': vis_folder, 'single_encoder': args.single_encoder}, device=args.gpu[0]),trigger=vis_interval) # Run the training trainer.run()
def main(): chainer.set_debug(True) parser = get_parser() args = parser.parse_args() reset_seed(args.seed) #load vocabulary source0_ids = load_vocabulary(args.SOURCE_VOCAB0) source1_ids = load_vocabulary(args.SOURCE_VOCAB1) target_ids = load_vocabulary(args.TARGET_VOCAB) print('Source vocabulary size: %d' % len(source0_ids)) print('Source vocabulary size: %d' % len(source1_ids)) print('Target vocabulary size: %d' % len(target_ids)) train_data = make_data_tuple(source0=(source0_ids, args.SOURCE0), source1=(source1_ids, args.SOURCE1), target=(target_ids, args.TARGET)) source0_words = {i: w for w, i in source0_ids.items()} source1_words = {i: w for w, i in source1_ids.items()} target_words = {i: w for w, i in target_ids.items()} # Setup model model = Seq2seq(args.layer, len(source0_ids), len(source1_ids), len(target_ids), args.unit) if args.gpu >= 0: chainer.backends.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.l2)) # Setup iterator train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) # Setup updater and trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp', 'validation/main/perp', 'validation/main/bleu', 'test/main/bleu', 'elapsed_time' ]), trigger=(args.log_interval, 'iteration')) if args.validation_source0 and args.validation_source1 and args.validation_target: valid_data = make_data_tuple( source0=(source0_ids, args.validation_source0), source1=(source1_ids, args.validation_source1), target=(target_ids, args.validation_target)) @chainer.training.make_extension() def translate(trainer): source0, source1, target = valid_data[numpy.random.choice( len(valid_data))] result = model.translate([model.xp.array(source0)], [model.xp.array(source1)])[0] source0_sentence = ' '.join([source0_words[x] for x in source0]) source1_sentence = ' '.join([source1_words[x] for x in source1]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source0 : ' + source0_sentence) print('# source1 : ' + source1_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) trainer.extend(translate, trigger=(args.validation_interval, 'iteration')) trainer.extend(CalculateBleu(model, valid_data, 'validation/main/bleu', device=args.gpu), trigger=(args.validation_interval, 'iteration')) dev_iter = chainer.iterators.SerialIterator(valid_data, args.batchsize, repeat=False, shuffle=False) dev_eval = extensions.Evaluator(dev_iter, model, device=args.gpu, converter=convert) dev_eval.name = 'valid' trainer.extend(dev_eval, trigger=(args.validation_interval, 'iteration')) if args.test_source0 and args.test_source1 and args.test_target: test_data = make_data_tuple(source0=(source0_ids, args.test_source0), source1=(source1_ids, args.test_source1), target=(target_ids, args.test_target)) trainer.extend(CalculateBleu(model, test_data, 'test/main/bleu', device=args.gpu), trigger=(args.test_interval, 'iteration')) print('start training') if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) save_args(args, args.out) trainer.run() if args.save: # Save a snapshot chainer.serializers.save_npz(args.out + "/trainer.npz", trainer) chainer.serializers.save_npz(args.out + "/model.npz", model)
def main(): parser = argparse.ArgumentParser( description= 'Example: Uncertainty estimates with adversarial training in image synthesis', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--data_root', '-d', type=str, default='./preprocessed', help='Directory to dataset') parser.add_argument('--batchsize', '-b', type=int, default=5, help='Number of images in each mini-batch') parser.add_argument('--iteration', '-i', type=int, default=200000, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, nargs='+', default=[0], help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='logs', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--valid_augment', action='store_true', help='Enable data augmentation during validation') parser.add_argument('--valid_split_ratio', type=float, default=0.1, help='Ratio of validation data to training data') parser.add_argument('--lr', type=float, default=4e-4, help='Learning rate') parser.add_argument('--alpha', type=float, default=50., help='Weight of conditional loss') parser.add_argument( '--beta', type=float, default=0.5, help='Exponential decay rate of the first order moment in Adam') parser.add_argument('--decay', type=float, default=-1, help='Weight of L2 regularization') parser.add_argument('--mc_iteration', type=int, default=15, help='Number of iteration of MCMC') parser.add_argument('--pinfall', type=int, default=-1, help='Countdown for early stopping of training.') parser.add_argument( '--freeze_upconv', action='store_true', help= 'Disables updating the up-convolutional weights. If weights are initialized with \ bilinear kernels, up-conv acts as bilinear upsampler.' ) parser.add_argument('--test_on_test', action='store_true', help='Switch to the testing phase on test dataset') parser.add_argument('--test_on_valid', action='store_true', help='Switch to the testing phase on valid dataset') parser.add_argument('--seed', type=int, default=0, help='Fix the random seed') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('') # setup output directory os.makedirs(args.out, exist_ok=True) # NOTE: ad-hoc normalizer = get_normalizer() augmentor = get_augmentor() # setup a generator with fixed_seed(args.seed, strict=False): generator = build_generator() if args.freeze_upconv: generator.freeze_layers(name='upconv', recursive=True, verbose=True) # setup dataset train, valid, test = get_dataset(args.data_root, args.valid_split_ratio, args.valid_augment, normalizer, augmentor) # run if args.test_on_test: raise RuntimeError( 'This example is under construction. Please tune the hyperparameters first..' ) test_phase(generator, test, args) elif args.test_on_valid: test_phase(generator, valid, args) else: save_args(args, args.out) generator.save_args(os.path.join(args.out, 'model.json')) normalizer.summary(os.path.join(args.out, 'norm.json')) augmentor.summary(os.path.join(args.out, 'augment.json')) train_phase(generator, train, valid, args)
def arguments(): parser = argparse.ArgumentParser(description='chainer implementation of pix2pix') parser.add_argument('--train', '-t', help='text file containing image pair filenames for training') parser.add_argument('--val', help='text file containing image pair filenames for validation') parser.add_argument('--imgtype', '-it', default="jpg", help="image file type (file extension)") parser.add_argument('--argfile', '-a', help="specify args file to read") parser.add_argument('--from_col', '-c1', type=int, nargs="*", default=[0], help='column index of FromImage') parser.add_argument('--to_col', '-c2', type=int, nargs="*", default=[1], help='column index of ToImage') parser.add_argument('--batch_size', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=400, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--root', '-R', default='.', help='directory containing image files') parser.add_argument('--learning_rate', '-lr', type=float, default=1e-4) parser.add_argument('--snapinterval', '-si', type=int, default=-1, help='take snapshot every this epoch') parser.add_argument('--display_interval', type=int, default=500, help='Interval of displaying log to console') parser.add_argument('--nvis', type=int, default=3, help='number of images in visualisation after each epoch') parser.add_argument('--crop_width', '-cw', type=int, default=128, help='better to have a value divisible by a large power of two') parser.add_argument('--crop_height', '-ch', type=int, default=128, help='better to have a value divisible by a large power of two') parser.add_argument('--grey', action='store_true', help='greyscale') parser.add_argument('--lambda_rec_l1', '-l1', type=float, default=1.0) parser.add_argument('--lambda_rec_l2', '-l2', type=float, default=0.0) parser.add_argument('--lambda_dis', '-ldis', type=float, default=0.1) parser.add_argument('--lambda_tv', '-ltv', type=float, default=0.0) parser.add_argument('--lambda_mispair', '-lm', type=float, default=1.0) parser.add_argument('--tv_tau', '-tt', type=float, default=1e-3, help='smoothing parameter for total variation') parser.add_argument('--load_optimizer', '-op', action='store_true', help='load optimizer parameters') parser.add_argument('--model_gen', '-m', default='') parser.add_argument('--model_dis', '-md', default='') parser.add_argument('--dtype', '-dt', choices=dtypes.keys(), default='fp32', help='floating point precision') parser.add_argument('--eqconv', '-eq', action='store_true', help='Equalised Convolution') parser.add_argument('--spconv', '-sp', action='store_true', help='Separable Convolution') parser.add_argument('--weight_decay', '-wd', type=float, default=0, #default: 1e-7 help='weight decay for regularization') parser.add_argument('--weight_decay_norm', '-wn', choices=['l1','l2'], default='l2', help='norm of weight decay for regularization') parser.add_argument('--vis_freq', '-vf', type=int, default=4000, help='visualisation frequency in iteration') # data augmentation parser.add_argument('--random', '-rt', default=True, help='random flip/crop') parser.add_argument('--noise', '-n', type=float, default=0, help='strength of noise injection') parser.add_argument('--noise_z', '-nz', type=float, default=0, help='strength of noise injection for the latent variable') # discriminator parser.add_argument('--dis_activation', '-da', default='lrelu', choices=activation.keys()) parser.add_argument('--dis_basech', '-db', type=int, default=64, help='the base number of channels in discriminator') parser.add_argument('--dis_ksize', '-dk', type=int, default=4, # default 4 help='kernel size for patchGAN discriminator') parser.add_argument('--dis_ndown', '-dl', type=int, default=3, # default 3 help='number of down layers in discriminator') parser.add_argument('--dis_down', '-dd', default='down', choices=['down','maxpool','maxpool_res','avgpool','avgpool_res','none'], ## default down help='type of down layers in discriminator') parser.add_argument('--dis_sample', '-ds', default='none', ## default down help='type of first conv layer for patchGAN discriminator') parser.add_argument('--dis_jitter', type=float, default=0, help='jitter for discriminator label for LSGAN') parser.add_argument('--dis_dropout', '-ddo', type=float, default=None, help='dropout ratio for discriminator') parser.add_argument('--dis_norm', '-dn', default='instance', choices=['instance', 'batch','batch_aff', 'rbatch', 'fnorm', 'none']) # generator: G: A -> B, F: B -> A parser.add_argument('--gen_activation', '-ga', default='relu', choices=activation.keys()) parser.add_argument('--gen_fc_activation', '-gfca', default='relu', choices=activation.keys()) parser.add_argument('--gen_out_activation', '-go', default='tanh', choices=activation.keys()) parser.add_argument('--gen_chs', '-gc', type=int, nargs="*", default=[64,128,256,512], help='Number of channels in down layers in generator; the first entry should coincide with the number of channels in the input images') parser.add_argument('--gen_fc', '-gfc', type=int, default=0, help='number of fc layers before convolutional layers') parser.add_argument('--gen_nblock', '-nb', type=int, default=9, # default 9 help='number of residual blocks in generators') parser.add_argument('--gen_ksize', '-gk', type=int, default=3, # default 4 help='kernel size for generator') parser.add_argument('--gen_sample', '-gs', default='none', help='first and last conv layers for generator') parser.add_argument('--gen_down', '-gd', default='down', choices=['down','maxpool','maxpool_res','avgpool','avgpool_res','none'], help='down layers in generator') parser.add_argument('--gen_up', '-gu', default='resize', choices=['unpool','unpool_res','deconv','pixsh','resize','resize_res','none'], help='up layers in generator') parser.add_argument('--gen_dropout', '-gdo', type=float, default=None, help='dropout ratio for generator') parser.add_argument('--gen_norm', '-gn', default='instance', choices=['instance', 'batch','batch_aff', 'rbatch', 'fnorm', 'none']) parser.add_argument('--unet', '-u', default='none', choices=['none','no_last','with_last'], help='use u-net for generator') args = parser.parse_args() args.out = os.path.join(args.out, dt.now().strftime('%m%d_%H%M')+"_cgan") save_args(args, args.out) print(args) print(args.out) args.wgan=False args.dtype = dtypes[args.dtype] args.dis_activation = activation[args.dis_activation] args.gen_activation = activation[args.gen_activation] args.gen_fc_activation = activation[args.gen_fc_activation] args.gen_out_activation = activation[args.gen_out_activation] args.lrdecay_start = args.epoch//2 args.lrdecay_period = args.epoch - args.lrdecay_start return(args)
def main(): global model logging.info('Training model: {}'.format(args.network)) net = imp.load_source('Network', args.network) audionet = imp.load_source('Network', './models/audio_nets.py') model = net.Dancer(args.initOpt, getattr(audionet, args.encoder)) if args.gpu >= 0: if chainer.cuda.available: chainer.cuda.get_device_from_id(args.gpu).use() chainer.config.cudnn_deterministic = False if platform == 'Windows': import subprocess win_cmd = 'wmic path win32_VideoController get Name | findstr /C:"NVIDIA"' names_gpu = subprocess.check_output(win_cmd, shell=True).decode("utf-8") gpu_name = names_gpu.split('\r')[0] elif platform == "Linux": import os names_gpu = os.popen( 'lspci | grep NVIDIA | grep controller').read().split('\n') try: _, gpu_name = names_gpu[args.gpu].split('[') gpu_name, _ = gpu_name.split(']') except Exception as e: gpu_name = "" else: raise OSError('OS not supported') logging.info('GPU: {} - {}'.format(args.gpu, gpu_name)) model.to_gpu() else: logging.warning( 'No GPU was found, the training will be executed in the CPU') args.gpu = -1 logging.info('Minibatch-size: {}'.format(args.batch)) logging.info('# epoch: {}'.format(args.epoch)) DBClass = importlib.import_module('inlib.dataset_hdf5') try: trainset = getattr(DBClass, args.dataset)(args.folder, args.sequence, 'train', args.init_step) except Exception as e: logging.warning( 'Cannot continue with the training, Failing Loading Data... ') raise TypeError(e) try: testset = getattr(DBClass, args.dataset)(args.folder, args.sequence, 'test', args.init_step) except Exception as e: logging.warning( 'Cannot find testing files, test stage will be skipped... ') testset = None def make_optimizer(net, alpha=0.0002, beta1=0.5): optimizer = optimizers.Adam(alpha=alpha, beta1=beta1, amsgrad=True) optimizer.setup(net) logging.info('Adding Gradient Clipping Hook') optimizer.add_hook(GradientClipping(10.), 'hook_clip') logging.info('Adding Gradient Noise Hook') optimizer.add_hook(GradientNoise(0.01), 'hook_noise') return optimizer optimizer = make_optimizer(model) train_iter = iterators.MultiprocessIterator(trainset, batch_size=args.batch, shuffle=True, n_processes=args.workers, n_prefetch=args.workers) if testset is not None: test_iter = iterators.SerialIterator(testset, batch_size=args.batch, repeat=False, shuffle=False) # TODO(nelson): Change later the steps updater = BPTTUpdater(train_iter, optimizer, None, args.gpu, converter=convert) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.save) trainer.extend(extensions.dump_graph('main/loss')) frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) trainer.extend(extensions.LogReport()) if testset is not None: trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu, converter=convert)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PrintReport(['epoch', 'main/loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.observe_lr()) # trainer.extend(CommandsExtension()) save_args(args, args.save) trainer.run() if not os.path.exists(args.save): os.makedirs(args.save) serializers.save_hdf5('{}/trained_{}.model'.format(args.save, args.epoch), model)
def main(): parser = argparse.ArgumentParser(description='Mask R-CNN') parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--iteration', '-i', type=int, default=200000) parser.add_argument('--weight', '-w', type=str, default='') parser.add_argument('--label_file', '-f', type=str, default='data/label_coco.txt') parser.add_argument('--backbone', type=str, default='fpn') parser.add_argument('--head_arch', '-a', type=str, default='fpn_keypoint') parser.add_argument('--multi_gpu', '-m', type=int, default=0) parser.add_argument('--batch_size', '-b', type=int, default=1) parser.add_argument('--dataset', default='coco', choices=['coco', 'depth']) args = parser.parse_args() print('lr:{}'.format(args.lr)) print('output:{}'.format(args.out)) print('weight:{}'.format(args.weight)) print('label file:{}'.format(args.label_file)) print('iteration::{}'.format(args.iteration)) print('backbone architecture:{}'.format(args.backbone)) print('head architecture:{}'.format(args.head_arch)) if args.dataset == 'coco': train_data = load_dataset(COCOKeypointsLoader, 'train_data_kp.pkl') elif args.dataset == 'depth': train_data = load_dataset( lambda: DepthDataset(path='data/rgbd/train.txt', root='data/rgbd/' ), 'train_data_depth_kp.pkl') n_keypoints = train_data.n_keypoints print(f'number of keypoints={n_keypoints}') if args.multi_gpu: print( 'try to use chainer.training.updaters.MultiprocessParallelUpdater') if not chainer.training.updaters.MultiprocessParallelUpdater.available( ): print('MultiprocessParallelUpdater is not available') args.multi_gpu = 0 faster_rcnn = MaskRCNNResnet50(n_fg_class=1, backbone=args.backbone, head_arch=args.head_arch, n_keypoints=n_keypoints) faster_rcnn.use_preset('evaluate') model = FPNMaskRCNNTrainChain( faster_rcnn, mask_loss_fun=lambda x, y, z, w: calc_mask_loss( x, y, z, w, num_keypoints=n_keypoints), binary_mask=False) if exists(args.weight): chainer.serializers.load_npz(args.weight, model.faster_rcnn, strict=False) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # TransformでFaster-RCNNのprepareを参照するので、初期化順が複雑に入り組んでしまったなー train_data = TransformDataset(train_data, Transform(faster_rcnn)) if args.multi_gpu: train_iters = [ chainer.iterators.SerialIterator(train_data, 1, repeat=True, shuffle=True) for i in range(8) ] updater = chainer.training.updater.MultiprocessParallelUpdater( train_iters, optimizer, device=range(8)) else: train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size, repeat=True, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = chainer.training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.snapshot_object( model.faster_rcnn, 'model_{.updater.iteration}.npz'), trigger=(20000, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(1, 'epoch')) log_interval = 100, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/mask_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ]), trigger=(100, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=200)) trainer.extend(extensions.dump_graph('main/loss')) save_args(args, args.out) trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) trainer.run()
def main(): args = arguments() outdir = os.path.join(args.out, dt.now().strftime('%m%d_%H%M') + "_cgan") # chainer.config.type_check = False chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() #print('Chainer version: ', chainer.__version__) #print('GPU availability:', chainer.cuda.available) #print('cuDNN availability:', chainer.cuda.cudnn_enabled) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() ## dataset preparation train_d = Dataset(args.train, args.root, args.from_col, args.to_col, clipA=args.clipA, clipB=args.clipB, class_num=args.class_num, crop=(args.crop_height, args.crop_width), imgtype=args.imgtype, random=args.random_translate, grey=args.grey, BtoA=args.btoa) test_d = Dataset(args.val, args.root, args.from_col, args.to_col, clipA=args.clipA, clipB=args.clipB, class_num=args.class_num, crop=(args.crop_height, args.crop_width), imgtype=args.imgtype, random=args.random_translate, grey=args.grey, BtoA=args.btoa) args.crop_height, args.crop_width = train_d.crop if (len(train_d) == 0): print("No images found!") exit() # setup training/validation data iterators train_iter = chainer.iterators.SerialIterator(train_d, args.batch_size) test_iter = chainer.iterators.SerialIterator(test_d, args.nvis, shuffle=False) test_iter_gt = chainer.iterators.SerialIterator( train_d, args.nvis, shuffle=False) ## same as training data; used for validation args.ch = len(train_d[0][0]) args.out_ch = len(train_d[0][1]) print("Input channels {}, Output channels {}".format(args.ch, args.out_ch)) if (len(train_d) * len(test_d) == 0): print("No images found!") exit() ## Set up models # shared pretrained layer if (args.gen_pretrained_encoder and args.gen_pretrained_lr_ratio == 0): if "resnet" in args.gen_pretrained_encoder: pretrained = L.ResNet50Layers() print("Pretrained ResNet model loaded.") else: pretrained = L.VGG16Layers() print("Pretrained VGG model loaded.") if args.gpu >= 0: pretrained.to_gpu() enc_x = net.Encoder(args, pretrained) else: enc_x = net.Encoder(args) # gen = net.Generator(args) dec_y = net.Decoder(args) if args.lambda_dis > 0: dis = net.Discriminator(args) models = {'enc_x': enc_x, 'dec_y': dec_y, 'dis': dis} else: dis = L.Linear(1, 1) models = {'enc_x': enc_x, 'dec_y': dec_y} ## load learnt models optimiser_files = [] if args.model_gen: serializers.load_npz(args.model_gen, enc_x) serializers.load_npz(args.model_gen.replace('enc_x', 'dec_y'), dec_y) print('model loaded: {}, {}'.format( args.model_gen, args.model_gen.replace('enc_x', 'dec_y'))) optimiser_files.append(args.model_gen.replace('enc_x', 'opt_enc_x')) optimiser_files.append(args.model_gen.replace('enc_x', 'opt_dec_y')) if args.model_dis: serializers.load_npz(args.model_dis, dis) print('model loaded: {}'.format(args.model_dis)) optimiser_files.append(args.model_dis.replace('dis', 'opt_dis')) ## send models to GPU if args.gpu >= 0: enc_x.to_gpu() dec_y.to_gpu() dis.to_gpu() # Setup optimisers def make_optimizer(model, lr, opttype='Adam', pretrained_lr_ratio=1.0): # eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = optim[opttype](lr) optimizer.setup(model) if args.weight_decay > 0: if opttype in ['Adam', 'AdaBound', 'Eve']: optimizer.weight_decay_rate = args.weight_decay else: if args.weight_decay_norm == 'l2': optimizer.add_hook( chainer.optimizer.WeightDecay(args.weight_decay)) else: optimizer.add_hook( chainer.optimizer_hooks.Lasso(args.weight_decay)) return optimizer opt_enc_x = make_optimizer(enc_x, args.learning_rate_gen, args.optimizer) opt_dec_y = make_optimizer(dec_y, args.learning_rate_gen, args.optimizer) opt_dis = make_optimizer(dis, args.learning_rate_dis, args.optimizer) optimizers = {'enc_x': opt_enc_x, 'dec_y': opt_dec_y, 'dis': opt_dis} ## resume optimisers from file if args.load_optimizer: for (m, e) in zip(optimiser_files, optimizers): if m: try: serializers.load_npz(m, optimizers[e]) print('optimiser loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # finetuning if args.gen_pretrained_encoder: if args.gen_pretrained_lr_ratio == 0: enc_x.base.disable_update() else: for func_name in enc_x.encoder.base._children: for param in enc_x.encoder.base[func_name].params(): param.update_rule.hyperparam.eta *= args.gen_pretrained_lr_ratio # Set up trainer updater = Updater( models=(enc_x, dec_y, dis), iterator={'main': train_iter}, optimizer=optimizers, # converter=convert.ConcatWithAsyncTransfer(), params={'args': args}, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) ## save learnt results at a specified interval or at the end of training if args.snapinterval < 0: args.snapinterval = args.epoch snapshot_interval = (args.snapinterval, 'epoch') display_interval = (args.display_interval, 'iteration') for e in models: trainer.extend(extensions.snapshot_object(models[e], e + '{.updater.epoch}.npz'), trigger=snapshot_interval) if args.parameter_statistics: trainer.extend(extensions.ParameterStatistics( models[e])) ## very slow for e in optimizers: trainer.extend(extensions.snapshot_object( optimizers[e], 'opt_' + e + '{.updater.epoch}.npz'), trigger=snapshot_interval) ## plot NN graph if args.lambda_rec_l1 > 0: trainer.extend( extensions.dump_graph('dec_y/loss_L1', out_name='enc.dot')) elif args.lambda_rec_l2 > 0: trainer.extend( extensions.dump_graph('dec_y/loss_L2', out_name='gen.dot')) elif args.lambda_rec_ce > 0: trainer.extend( extensions.dump_graph('dec_y/loss_CE', out_name='gen.dot')) if args.lambda_dis > 0: trainer.extend( extensions.dump_graph('dis/loss_real', out_name='dis.dot')) ## log outputs log_keys = ['epoch', 'iteration', 'lr'] log_keys_gen = ['myval/loss_L1', 'myval/loss_L2'] log_keys_dis = [] if args.lambda_rec_l1 > 0: log_keys_gen.append('dec_y/loss_L1') if args.lambda_rec_l2 > 0: log_keys_gen.append('dec_y/loss_L2') if args.lambda_rec_ce > 0: log_keys_gen.extend(['dec_y/loss_CE', 'myval/loss_CE']) if args.lambda_reg > 0: log_keys.extend(['enc_x/loss_reg']) if args.lambda_tv > 0: log_keys_gen.append('dec_y/loss_tv') if args.lambda_dis > 0: log_keys_dis.extend( ['dec_y/loss_dis', 'dis/loss_real', 'dis/loss_fake']) if args.lambda_mispair > 0: log_keys_dis.append('dis/loss_mispair') if args.dis_wgan: log_keys_dis.extend(['dis/loss_gp']) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend(extensions.PrintReport(log_keys + log_keys_gen + log_keys_dis), trigger=display_interval) if extensions.PlotReport.available(): # trainer.extend(extensions.PlotReport(['lr'], 'iteration',trigger=display_interval, file_name='lr.png')) trainer.extend( extensions.PlotReport(log_keys_gen, 'iteration', trigger=display_interval, file_name='loss_gen.png', postprocess=plot_log)) trainer.extend( extensions.PlotReport(log_keys_dis, 'iteration', trigger=display_interval, file_name='loss_dis.png')) trainer.extend(extensions.ProgressBar(update_interval=10)) # learning rate scheduling trainer.extend(extensions.observe_lr(optimizer_name='enc_x'), trigger=display_interval) if args.optimizer in ['Adam', 'AdaBound', 'Eve']: lr_target = 'eta' else: lr_target = 'lr' if args.lr_drop > 0: ## cosine annealing for e in [opt_enc_x, opt_dec_y, opt_dis]: trainer.extend(CosineShift(lr_target, args.epoch // args.lr_drop, optimizer=e), trigger=(1, 'epoch')) else: for e in [opt_enc_x, opt_dec_y, opt_dis]: #trainer.extend(extensions.LinearShift('eta', (1.0,0.0), (decay_start_iter,decay_end_iter), optimizer=e)) trainer.extend(extensions.ExponentialShift('lr', 0.33, optimizer=e), trigger=(args.epoch // args.lr_drop, 'epoch')) # evaluation vis_folder = os.path.join(outdir, "vis") os.makedirs(vis_folder, exist_ok=True) if not args.vis_freq: args.vis_freq = max(len(train_d) // 2, 50) trainer.extend(VisEvaluator({ "test": test_iter, "train": test_iter_gt }, { "enc_x": enc_x, "dec_y": dec_y }, params={ 'vis_out': vis_folder, 'args': args }, device=args.gpu), trigger=(args.vis_freq, 'iteration')) # ChainerUI: removed until ChainerUI updates to be compatible with Chainer 6.0 trainer.extend(CommandsExtension()) # Run the training print("\nresults are saved under: ", outdir) save_args(args, outdir) trainer.run()
def main(): parser = argparse.ArgumentParser( description='Example: Uncertainty estimates in segmentation', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--data_root', '-d', type=str, default='./preprocessed', help='Directory to dataset') parser.add_argument('--batchsize', '-b', type=int, default=2, help='Number of images in each mini-batch') parser.add_argument('--iteration', '-i', type=int, default=50000, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, nargs='+', default=[0], help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='logs', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--valid_augment', action='store_true', help='Enable data augmentation during validation') parser.add_argument('--valid_split_ratio', type=float, default=0.1, help='Ratio of validation data to training data') parser.add_argument( '--valid_split_type', type=str, default='slice', choices=['slice', 'patient'], help='How to choice validation data from training data') parser.add_argument('--lr', type=float, default=1e-4, help='Learning rate') parser.add_argument('--decay', type=float, default=-1, help='Weight of L2 regularization') parser.add_argument('--mc_iteration', type=int, default=15, help='Number of iteration of MCMC') parser.add_argument('--pinfall', type=int, default=-1, help='Countdown for early stopping of training.') parser.add_argument( '--freeze_upconv', action='store_true', help= 'Disables updating the up-convolutional weights. If weights are initialized with \ bilinear kernels, up-conv acts as bilinear upsampler.' ) parser.add_argument('--test_on_test', action='store_true', help='Switch to the testing phase on test dataset') parser.add_argument('--test_on_valid', action='store_true', help='Switch to the testing phase on valid dataset') parser.add_argument('--seed', type=int, default=0, help='Fix the random seed') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('') # setup output directory os.makedirs(args.out, exist_ok=True) # NOTE: ad-hoc # setup a normalizer normalizer = Normalizer() normalizer.add(Clip2D('minmax')) normalizer.add(Subtract2D(0.5)) normalizer.add(Divide2D(1. / 255.)) # setup an augmentor augmentor = DataAugmentor(n_dim=2) augmentor.add(Flip2D(axis=2)) augmentor.add( Affine2D(rotation=15., translate=(10., 10.), shear=0.25, zoom=(0.8, 1.2), keep_aspect_ratio=True, fill_mode=('reflect', 'reflect'), cval=(0., 0.), interp_order=(1, 0))) with fixed_seed(args.seed, strict=False): # setup a predictor conv_param = { # NOTE: you can change layer type if you want.. 'name':'dilated', 'ksize': 3, 'stride': 1, 'pad': 2, 'dilate': 2, 'initialW': {'name': 'he_normal', 'scale': 1.0}, 'initial_bias': {'name': 'zero'}, } upconv_param = { # NOTE: you can change layer type if you want.. 'name':'deconv', 'ksize': 3, 'stride': 2, 'pad': 0, 'initialW': {'name': 'bilinear', 'scale': 1.0}, 'initial_bias': {'name': 'zero'}, } norm_param = {'name': 'batch'} predictor = BayesianUNet(ndim=2, out_channels=2, nlayer=4, nfilter=32, conv_param=conv_param, upconv_param=upconv_param, norm_param=norm_param) if args.freeze_upconv: predictor.freeze_layers(name='upconv', recursive=True, verbose=True) # setup dataset train, valid, test = get_dataset(args.data_root, args.valid_split_type, args.valid_split_ratio, args.valid_augment, normalizer, augmentor) # run if args.test_on_test: test_phase(predictor, test, args) elif args.test_on_valid: test_phase(predictor, valid, args) else: save_args(args, args.out) predictor.save_args(os.path.join(args.out, 'model.json')) normalizer.summary(os.path.join(args.out, 'norm.json')) augmentor.summary(os.path.join(args.out, 'augment.json')) train_phase(predictor, train, valid, args)
'gen_up', 'gen_ksize', 'unet', 'skipdim', 'latent_dim', 'gen_fc', 'gen_fc_activation', 'spconv', 'eqconv', 'senet', 'dtype' ]: if x in larg: setattr(args, x, larg[x]) for x in ['imgtype', 'crop_width', 'crop_height']: if not getattr(args, x): setattr(args, x, larg[x]) if not args.load_models: if larg["epoch"]: args.load_models = os.path.join( root, 'enc_x{}.npz'.format(larg["epoch"])) args.random_translate = 0 save_args(args, outdir) print(args) # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] ## load images if args.imgtype == "dcm": from dataset_dicom import Dataset as Dataset args.grey = True else: from dataset_jpg import DatasetOutMem as Dataset ## compatibility if not hasattr(args, 'out_ch'): args.out_ch = 1 if args.grey else 3
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError('ImageNet requires GPU support.') archs = [f'b{i}' for i in range(8)] + ['se'] patchsizes = { 'b0': 224, 'b1': 240, 'b2': 260, 'b3': 300, 'b4': 380, 'b5': 456, 'b6': 528, 'b7': 600, 'se': 224 } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('--arch', '-a', choices=archs, default='b0') parser.add_argument('--patchsize', default=None, type=int, help='The input size of images. If not specifed,\ architecture-wise default values wil be used.' ) parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--optimizer', default='RMSProp') parser.add_argument('--lr', default=0.256, type=float) parser.add_argument('--cosine_annealing', action='store_true') parser.add_argument('--exponent', type=float, default=0.97) parser.add_argument('--exponent_trigger', type=float, default=2.6) parser.add_argument('--soft_label', action='store_true') parser.add_argument('--epoch', '-E', type=int, default=350, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, default=3, help='Number of parallel data loading processes') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='../ssd/imagenet', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=32, help='Validation minibatch size') parser.add_argument('--workerwisebn', action='store_true') parser.add_argument('--no_dropconnect', action='store_true') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='pure_nccl') parser.add_argument('--no_autoaugment', action='store_true') parser.add_argument('--dtype', default='float32', choices=['mixed16', 'float32'], help='For now do not use mixed16') parser.set_defaults(test=False) args = parser.parse_args() chainer.global_config.dtype = args.dtype comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) mode = 'workerwise' if args.workerwisebn else 'synchronized' print(f'BatchNorm is {mode}') print('==========================================') if args.soft_label: accfun = soft_accuracy lossfun = soft_softmax_cross_entropy else: accfun = F.accuracy lossfun = F.softmax_cross_entropy if args.arch != 'se': model = EfficientNet(args.arch, workerwisebn=args.workerwisebn, no_dropconnect=args.no_dropconnect) else: model = SEResNeXt50() model = L.Classifier(model, lossfun=lossfun, accfun=accfun) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. patchsize = patchsizes[ args.arch] if args.patchsize is None else args.patchsize patchsize = (patchsize, patchsize) train_transform, val_transform, _ = get_transforms( patchsize, no_autoaugment=args.no_autoaugment, soft=args.soft_label) if comm.rank == 0: train = ImageNetDataset(args.root, 'train') val = ImageNetDataset(args.root, 'val') else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) train = chainer.datasets.TransformDataset(train, train_transform) val = chainer.datasets.TransformDataset(val, val_transform) # A workaround for processes crash should be done before making # communicator above, when using fork (e.g. MultiProcessIterator) # along with Infiniband. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. symbol = 'lr' if args.optimizer.lower() == 'rmsprop': optimizer = chainer.optimizers.RMSprop(lr=args.lr, alpha=0.9) elif args.optimizer.lower() == 'momentumsgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.lr) elif args.optimizer.lower() == 'corrected': optimizer = chainer.optimizers.CorrectedMomentumSGD(lr=args.lr) elif args.optimizer.lower() == 'adabound': optimizer = chainer.optimizers.AdaBound(alpha=args.lr, final_lr=0.5) symbol = 'alpha' optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-5)) args.out = f'experiments/{args.arch}' + args.out save_args(args, args.out) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (2, 'epoch') log_interval = (10, 'iteration') if args.test else (2, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) if args.cosine_annealing: schedule = lr_schedules.CosineLRSchedule(args.lr) if args.optimizer in ['MomentumSGD', 'Corrected']: trainer.extend(lr_schedules.LearningRateScheduler(schedule)) else: trainer.extend(extensions.ExponentialShift(symbol, args.exponent), trigger=(args.exponent_trigger, 'epoch')) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=100)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
params = configs.export() params['num_word_vocab'] = num_word_vocab params['num_char_vocab'] = num_char_vocab params['num_tag_vocab'] = num_tag_vocab epoch = configs['iteration']['epoch'] trigger = (epoch, 'epoch') model_path = configs['output'] timestamp = datetime.datetime.now() timestamp_str = timestamp.isoformat() output_path = Path(f'{model_path}.{timestamp_str}') trainer = T.Trainer(updater, trigger, out=output_path) save_args(params, output_path) msg = f'Create \x1b[31m{output_path}\x1b[0m for saving model snapshots' logging.debug(msg) entries = ['epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss'] entries += ['validation/main/loss', 'validation/main/fscore'] entries += ['validation_1/main/loss', 'validation_1/main/fscore'] valid_evaluator = NamedEntityEvaluator(valid_iterator, model, transformer.itransform, converter, device=args.device) test_evaluator = NamedEntityEvaluator(test_iterator, model,
def main(): parser = argparse.ArgumentParser( description='chainer implementation of model', ) parser.add_argument( '--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch', ) parser.add_argument( '--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train', ) parser.add_argument( '--base_ch', type=int, default=64, help='base channel size of hidden layer', ) parser.add_argument( '--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)', ) parser.add_argument( '--dataset', '-i', default='./image/fsm', help='Directory of image files.', ) parser.add_argument( '--out', '-o', default='result', help='Directory to output the result', ) parser.add_argument( '--resume', '-r', default='', help='Resume the training from snapshot', ) parser.add_argument( '--snapshot_interval', type=int, default=1000, help='Interval of snapshot', ) parser.add_argument( '--display_interval', type=int, default=10, help='Interval of displaying log to console', ) parser.add_argument( '--preview_interval', type=int, default=100, help='Interval of previewing generated image', ) parser.add_argument( '--use_random_nn_downscale', action='store_true', default=False, help='downscal by sampling 4-nearest pixel randomly', ) parser.add_argument( '--flat_discriminator', action='store_true', default=False, help='(deprecated)', ) parser.add_argument( '--composite', action='store_true', default=False, help='composite', ) args = parser.parse_args() save_args(args, args.out) print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') model = Pix2Pix(in_ch=4, out_ch=4, base_ch=args.base_ch, flat=args.flat_discriminator) gen = model.gen dis = model.dis if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.00001), 'hook_dec') return optimizer opt_gen = make_optimizer(gen) opt_dis = make_optimizer(dis) print('# upscale learning with automatically generated images') if args.composite: train_d = CompositeAutoUpscaleDataset(args.dataset, ) test_d = CompositeAutoUpscaleDataset(args.dataset, ) else: train_d = AutoUpscaleDataset( "{}/main".format(args.dataset), random_nn=args.use_random_nn_downscale, ) test_d = AutoUpscaleDataset( "{}/main".format(args.dataset), random_nn=False, ) train_iter = chainer.iterators.SerialIterator(train_d, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_d, args.batchsize) # Set up a trainer updater = Pix2PixUpdater( model=model, iterator={ 'main': train_iter, }, optimizer={ 'gen': opt_gen, 'dis': opt_dis, }, device=args.gpu, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') preview_interval = (args.preview_interval, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) trainer.extend( extensions.snapshot_object(model.gen, 'gen_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) trainer.extend( extensions.snapshot_object(model.dis, 'dis_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) trainer.extend(extensions.LogReport(trigger=display_interval)) trainer.extend( extensions.PlotReport( [ 'gen/loss_adv', 'gen/loss_rec', 'gen/loss', 'dis/loss', ], trigger=display_interval, )) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'gen/loss_adv', 'gen/loss_rec', 'gen/loss', 'dis/loss', ]), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) upscaler = Upscaler(ChainerConverter(gen, 64), batch_size=args.batchsize) trainer.extend(out_image(test_iter, gen, 10, args.out), trigger=display_interval) trainer.extend(full_out_image(upscaler, "{}/test".format(args.dataset), args.out), trigger=preview_interval) trainer.extend(CommandsExtension()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # make result dir network_name = 'AEFINetConcat' model_name = 'AEFINet_Test_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_name) util.make_result_dir(args, outdir) #loading dataset if args.iter_parallel: train = datasets.SequenceDataset( dataset='UCF101_train_size64_frame3_group10_max100_p') test = datasets.SequenceDataset( dataset='UCF101_test_size64_frame3_group25_max5_p') else: train = datasets.SequenceDatasetOnMem( dataset='UCF101_train_size64_frame3_group10_max100_p') test = datasets.SequenceDatasetOnMem( dataset='UCF101_test_size64_frame3_group25_max5_p') # prepare model chainer.cuda.get_device_from_id(args.gpu0).use() model = N.GenEvaluator(N.AEFINetConcat(f_size=args.fsize, ch=args.ch)) # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.ParallelUpdater( train_iter, optimizer, devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(100, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(100, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/PSNR', 'validation/main/PSNR'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/PSNR', 'validation/main/PSNR', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # start train trainer.run() # save final model util.save_trained_model(model_name, model, network_name, f_size=args.fsize, ch=args.ch)
def main(): parser = argparse.ArgumentParser( description='chainer implementation of pix2pix', ) parser.add_argument( '--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch', ) parser.add_argument( '--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train', ) parser.add_argument( '--base_ch', type=int, default=64, help='base channel size of hidden layer', ) parser.add_argument( '--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)', ) parser.add_argument( '--dataset', '-i', default='./image/fsm', help='Directory of image files.', ) parser.add_argument( '--out', '-o', default='result', help='Directory to output the result', ) parser.add_argument( '--resume', '-r', default='', help='Resume the training from snapshot', ) parser.add_argument( '--snapshot_interval', type=int, default=1000, help='Interval of snapshot', ) parser.add_argument( '--display_interval', type=int, default=10, help='Interval of displaying log to console', ) parser.add_argument( '--preview_interval', type=int, default=100, help='Interval of previewing generated image', ) args = parser.parse_args() save_args(args, args.out) print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') upscaler = Pix2Pix(in_ch=4, out_ch=4, base_ch=args.base_ch) downscaler = Pix2Pix(in_ch=4, out_ch=4, base_ch=args.base_ch) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current upscaler.to_gpu() downscaler.to_gpu() # Setup an optimizer def make_optimizer(model, alpha=0.0002, beta1=0.5): optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.00001), 'hook_dec') return optimizer opt_gen_up = make_optimizer(upscaler.gen) opt_dis_up = make_optimizer(upscaler.dis) opt_gen_down = make_optimizer(downscaler.gen) opt_dis_down = make_optimizer(downscaler.dis) train_l_d = AutoUpscaleDataset( "{}/trainA".format(args.dataset), random_nn=True, ) train_s_d = Single32Dataset("{}/trainB".format(args.dataset), ) test_l_d = AutoUpscaleDataset( "{}/trainA".format(args.dataset), random_nn=False, ) test_s_d = Single32Dataset("{}/trainB".format(args.dataset), ) train_l_iter = chainer.iterators.SerialIterator(train_l_d, args.batchsize) test_l_iter = chainer.iterators.SerialIterator(test_l_d, 1) train_s_iter = chainer.iterators.SerialIterator(train_s_d, args.batchsize) test_s_iter = chainer.iterators.SerialIterator(test_s_d, 1) # Set up a trainer updater = CycleUpdater( upscaler=upscaler, downscaler=downscaler, iterator={ 'main': train_l_iter, 'trainB': train_s_iter, 'testA': test_l_iter, 'testB': test_s_iter, }, optimizer={ 'gen_up': opt_gen_up, 'dis_up': opt_dis_up, 'gen_down': opt_gen_down, 'dis_down': opt_dis_down, }, device=args.gpu, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') display_interval = (args.display_interval, 'iteration') preview_interval = (args.preview_interval, 'iteration') trainer.extend( extensions.snapshot(filename='snapshot_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) logging_keys = [] trainer.extend( extensions.snapshot_object(upscaler.gen, 'gen_up_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) trainer.extend( extensions.snapshot_object(upscaler.dis, 'dis_up_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) logging_keys += [ 'gen_up/loss_adv', 'gen_up/loss_rec', 'dis_up/loss_real', 'dis_up/loss_fake', ] trainer.extend( extensions.snapshot_object(downscaler.gen, 'gen_down_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) trainer.extend( extensions.snapshot_object(downscaler.dis, 'dis_down_iter_{.updater.iteration}.npz'), trigger=snapshot_interval, ) logging_keys += [ 'gen_down/loss_adv', 'gen_down/loss_rec', 'dis_down/loss_real', 'dis_down/loss_fake', ] trainer.extend(extensions.LogReport(trigger=preview_interval)) trainer.extend( extensions.PlotReport( logging_keys, trigger=preview_interval, )) trainer.extend(extensions.PrintReport(['epoch', 'iteration'] + logging_keys, ), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(out_image_cycle(upscaler.gen, downscaler.gen, 8, args.out), trigger=preview_interval) trainer.extend(CommandsExtension()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=20, help='Number of images in each mini-batch') parser.add_argument('--learning_rate', type=float, default=1e-4, help="learning rate.") parser.add_argument('--bands', type=str, default="0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13") parser.add_argument('--patchsize', type=int, default=64) parser.add_argument( '--image_concat', type=float, default=0, help='vertial concat augmentation happenning ratio, in [0, 1].') parser.add_argument('--mixup', type=float, default=0) parser.add_argument('--loss_coeffs', type=str, default="1, 1, 0, 0", help="(coeff for MAE, MSE, MRAE and SID.") parser.add_argument( '--n_feats', type=int, default=256, help='the number of kernels in the convlution just after\ the concats in each dense block.' ) parser.add_argument('--n_RDBs', type=int, default=20, help='The number of RDB units.') parser.add_argument('--n_denselayers', type=int, default=6, help='The number of layers in each RDB.') parser.add_argument('--growth_rate', type=int) parser.add_argument('--res_scale', type=float, default=1.) parser.add_argument('--last_relu', type=str, default="True") parser.add_argument('--calc_sid', type=str, default="True") parser.add_argument('--epoch', '-e', type=int, default=1000, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result_t1', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--device_index', type=int, default=0) args = parser.parse_args() save_args(args, args.out) bands = list(map(int, args.bands.split(','))) loss_coeffs = list(map(float, args.loss_coeffs.split(','))) if args.last_relu == "True": last_relu = True elif args.last_relu == "False": last_relu = False else: raise ValueError("argument last_relu must be 'True' or 'False'") if args.calc_sid == "True": calc_sid = True elif args.calc_sid == "False": calc_sid = False else: raise ValueError("argument calc_sid must be 'True' or 'False'") print('==========================================') if args.device_index >= 0: print('Using GPU {}'.format(args.device_index)) else: print('Using CPU') print('target bands are: {}'.format(bands)) print('augmentations: image_concat: {}, mixup: {}'.format( args.image_concat, args.mixup)) print('ratios of losses mae={}, mse={}, mrae={}, sid={}'\ .format(loss_coeffs[0], loss_coeffs[1], loss_coeffs[2], loss_coeffs[3])) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') device_id = args.device_index model = models.ResidualDenseNet(scale=3, in_channels=14, out_channels=len(bands), n_feats=args.n_feats, n_RDBs=args.n_RDBs, n_denselayers=args.n_denselayers, growth_rate=args.growth_rate, res_scale=args.res_scale) if len(args.resume) > 0: chainer.serializers.load_npz(args.resume, model) model = super_resolution.SuperResolution(model, device=args.device_index, loss_coeffs=loss_coeffs, last_relu=last_relu, calc_sid=calc_sid) if device_id >= 0: model.to_gpu(device_id) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainer.optimizers.Adam(args.learning_rate) optimizer.setup(model) train = datasets.T1Dataset(list(range(200)), 'data/PIRMt1/normalized/train_lr3.npy', train_target, patchsize=args.patchsize, scale=3, train=True, image_concat=args.image_concat, mixup=args.mixup, target_bands=bands) validation = datasets.T1Dataset(list(range(20)), 'data/PIRMt1/normalized/val_lr3.npy', val_target, patchsize=64, scale=3, train=False, target_bands=bands) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(validation, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device_id) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(val_iter, model, device=device_id) trainer.extend(evaluator, trigger=(25, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.ExponentialShift('alpha', 0.8), trigger=training.triggers.IntervalTrigger(50, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport([ 'epoch', 'main/MAE', 'main/MSE', 'main/MRAE', 'main/loss', 'main/SID', 'validation/main/MAE', 'validation/main/MSE', 'validation/main/MRAE', 'validation/main/SID', 'validation/main/loss', 'elapsed_time' ]), trigger=(1, 'epoch')) trainer.extend(extensions.ProgressBar()) trainer.extend(extensions.snapshot_object( model.predictor, 't1_denseresnet_{.updater.epoch}.npz'), trigger=(50, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, model.predictor) trainer.run()
def main(): """ main関数 """ parser = argparse.ArgumentParser() parser.add_argument('--vocab', '-v', type=str, default='conversation_corpus/vocab.txt') parser.add_argument('--seq_in', '-i', type=str, default='conversation_corpus/input_sequence.txt') parser.add_argument('--seq_out', '-o', type=str, default='conversation_corpus/output_sequence.txt') parser.add_argument('--epoch', '-e', type=int, default=100) parser.add_argument('--log_epoch', type=int, default=1) parser.add_argument('--alpha', '-a', type=float, default=0.001) parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--batch', '-b', type=int, default=64) parser.add_argument('--layer', '-l', type=int, default=3) parser.add_argument('--unit', '-u', type=int, default=256) parser.add_argument('--lr_shift', '-s', action='store_true', default=False) parser.add_argument('--resume', '-r', default='') args = parser.parse_args() # save didrectory outdir = path.join( ROOT_PATH, 'seq2seq_results/seq2seq_conversation_epoch_{}_layer_{}_unit_{}_vocab_{}' .format(args.epoch, args.layer, args.unit, args.vocab.strip().split('/')[-1].split('.')[0])) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) # print param print('# GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batch)) print('# Epoch: {}'.format(args.epoch)) print('# Adam alpha: {}'.format(args.alpha)) print('# embedID unit :{}'.format(args.unit)) print('# LSTM layer :{}'.format(args.layer)) print('# out directory :{}'.format(outdir)) print('# lr shift: {}'.format(args.lr_shift)) print('') # load dataset vocab_ids = load_vocab(args.vocab) train_data = load_data(vocab_ids, args.seq_in, args.seq_out) # prepare model model = Seq2seq(n_layers=args.layer, n_vocab=len(vocab_ids), n_units=args.unit) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # optimizer optimizer = chainer.optimizers.Adam(alpha=args.alpha) optimizer.setup(model) # iter train_iter = chainer.iterators.SerialIterator(train_data, args.batch) # trainer updater = training.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # extention # lr shift if args.lr_shift: trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(200, 'epoch')) # log trainer.extend(extensions.LogReport(trigger=(args.log_epoch, 'epoch'))) trainer.extend(extensions.observe_lr(), trigger=(args.log_epoch, 'epoch')) # print info trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/perp', 'lr', 'elapsed_time']), trigger=(args.log_epoch, 'epoch')) # print progbar trainer.extend(extensions.ProgressBar()) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss'], 'epoch', file_name='loss.png')) # save snapshot and model trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) # start learn print('start training') trainer.run() # save final model chainer.serializers.save_npz( path.join(outdir, "seq2seq_conversation_model.npz"), model)
def main(): parser = argparse.ArgumentParser( description='Chainer example: seq2seq', formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('SOURCE', help='source sentence list') parser.add_argument('TARGET', help='target sentence list') parser.add_argument('SOURCE_VOCAB', help='source vocabulary file') parser.add_argument('TARGET_VOCAB', help='target vocabulary file') parser.add_argument('--validation-source', help='source sentence list for validation') parser.add_argument('--validation-target', help='target sentence list for validation') parser.add_argument('--test-source', help='source sentence list for test') parser.add_argument('--test-target', help='target sentence list for test') parser.add_argument('--batchsize', '-b', type=int, default=64, help='number of sentence pairs in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='number of units') parser.add_argument('--layer', '-l', type=int, default=3, help='number of layers') parser.add_argument('--seed', type=int, default=0, help='random seed') parser.add_argument('--min-source-sentence', type=int, default=1, help='minimium length of source sentence') parser.add_argument('--max-source-sentence', type=int, default=50, help='maximum length of source sentence') parser.add_argument('--min-target-sentence', type=int, default=1, help='minimium length of target sentence') parser.add_argument('--max-target-sentence', type=int, default=50, help='maximum length of target sentence') parser.add_argument('--log-interval', type=int, default=200, help='number of iteration to show log') parser.add_argument('--validation-interval', type=int, default=4000, help='number of iteration to evlauate the model ' 'with validation dataset') parser.add_argument('--test-interval', type=int, default=200, help='number of iteration to evlauate the model ' 'with test dataset') parser.add_argument('--out', '-o', default='result', help='directory to output the result') parser.add_argument('--l2', '-l2', type=float, default=0.001, help='number of l2 arg') args = parser.parse_args() reset_seed(args.seed) # Load pre-processed dataset source_ids = load_vocabulary(args.SOURCE_VOCAB) target_ids = load_vocabulary(args.TARGET_VOCAB) train_source = load_data(source_ids, args.SOURCE) train_target = load_data(target_ids, args.TARGET) assert len(train_source) == len(train_target) train_data = [ (s, t) for s, t in six.moves.zip(train_source, train_target) if (args.min_source_sentence <= len(s) <= args.max_source_sentence and args.min_target_sentence <= len(t) <= args.max_target_sentence) ] train_source_unknown = calculate_unknown_ratio([s for s, _ in train_data]) train_target_unknown = calculate_unknown_ratio([t for _, t in train_data]) print('Source vocabulary size: %d' % len(source_ids)) print('Target vocabulary size: %d' % len(target_ids)) print('Train data size: %d' % len(train_data)) print('Train source unknown ratio: %.2f%%' % (train_source_unknown * 100)) print('Train target unknown ratio: %.2f%%' % (train_target_unknown * 100)) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} # Setup model model = Seq2seq(args.layer, len(source_ids), len(target_ids), args.unit) if args.gpu >= 0: chainer.backends.cuda.get_device(args.gpu).use() model.to_gpu(args.gpu) # Setup optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.l2)) # Setup iterator train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) # Setup updater and trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=convert, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.LogReport(trigger=(args.log_interval, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/perp', 'validation/main/perp', 'validation/main/bleu', 'elapsed_time' ]), trigger=(args.log_interval, 'iteration')) #trainer.extend(extensions.snapshot()) if args.validation_source and args.validation_target: valid_source = load_data(source_ids, args.validation_source) valid_target = load_data(target_ids, args.validation_target) assert len(valid_source) == len(valid_target) valid_data = list(six.moves.zip(valid_source, valid_target)) valid_data = [(s, t) for s, t in valid_data if 0 < len(s) and 0 < len(t)] valid_source_unknown = calculate_unknown_ratio( [s for s, _ in valid_data]) valid_target_unknown = calculate_unknown_ratio( [t for _, t in valid_data]) print('Validation data: %d' % len(valid_data)) print('Validation source unknown ratio: %.2f%%' % (valid_source_unknown * 100)) print('Validation target unknown ratio: %.2f%%' % (valid_target_unknown * 100)) @chainer.training.make_extension() def translate(trainer): source, target = valid_data[numpy.random.choice(len(valid_data))] result = model.translate([model.xp.array(source)])[0] source_sentence = ' '.join([source_words[x] for x in source]) target_sentence = ' '.join([target_words[y] for y in target]) result_sentence = ' '.join([target_words[y] for y in result]) print('# source : ' + source_sentence) print('# result : ' + result_sentence) print('# expect : ' + target_sentence) trainer.extend(translate, trigger=(args.validation_interval, 'iteration')) trainer.extend(CalculateBleu(model, valid_data, 'validation/main/bleu', device=args.gpu), trigger=(args.validation_interval, 'iteration')) dev_iter = chainer.iterators.SerialIterator(valid_data, args.batchsize, repeat=False, shuffle=False) dev_eval = extensions.Evaluator(dev_iter, model, device=args.gpu, converter=convert) dev_eval.name = 'valid' trainer.extend(dev_eval, trigger=(args.validation_interval, 'iteration')) if args.test_source and args.test_target: test_source = load_data(source_ids, args.test_source) test_target = load_data(target_ids, args.test_target) assert len(test_source) == len(test_target) test_data = list(six.moves.zip(test_source, test_target)) test_data = [(s, t) for s, t in test_data if 0 < len(s) and 0 < len(t)] test_source_unknown = calculate_unknown_ratio( [s for s, _ in test_data]) test_target_unknown = calculate_unknown_ratio( [t for _, t in test_data]) trainer.extend(CalculateBleu(model, test_data, 'test/main/bleu', device=args.gpu), trigger=(args.test_interval, 'iteration')) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) test_eval = extensions.Evaluator(test_iter, model, device=args.gpu, converter=convert) test_eval.name = 'test' trainer.extend(test_eval, trigger=(args.validation_interval, 'iteration')) save_args(args, args.out) print('start training') trainer.run() # chainer.serializers.save_npz("save.npz",model) # Save a snapshot chainer.serializers.save_npz(args.out + "/trainer.npz", trainer) chainer.serializers.save_npz(args.out + "/model.npz", model)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Prepare VAE model, defined in net.py model = net.VAE(784, args.dimz, 500) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Initialize if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(withlabel=False) if args.test: train, _ = chainer.datasets.split_dataset(train, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(k=10))) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/rec_loss', 'validation/main/rec_loss', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) @chainer.training.make_extension() def out_generated_image(trainer): train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17] x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61] x_test = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1_test = model(x) # draw images from randomly sampled z z = chainer.Variable( np.random.normal(0, 1, (9, args.dimz)).astype(np.float32)) x_sampled = model.decode(z) epoch = trainer.updater.epoch iteration = trainer.updater.iteration with summary.reporter(epoch=epoch, iteration=iteration) as r: r.image(x.reshape(len(train_ind), 28, 28), 'train', row=3) r.image(x1.reshape(len(train_ind), 28, 28), 'train_reconstructed', row=3) r.image(x_test.reshape(len(test_ind), 28, 28), 'test', row=3) r.image(x1_test.reshape(len(test_ind), 28, 28), 'test_reconstructed', row=3) r.image(x_sampled.reshape(9, 28, 28), 'sampled', row=3) trainer.extend(out_generated_image, trigger=(5, 'epoch')) summary.set_out(args.out) save_args(args, args.out) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--seed', '-s', type=int, default=0, help='seed for random values') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.01, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--aug_method', '-a', default='random_erasing', choices=['none', 'mixup', 'random_erasing', 'both'], help='data augmentation strategy') parser.add_argument('--model', '-m', default='pyramid', choices=['resnet50', 'pyramid'], help='data augmentation strategy') parser.add_argument('--weights', '-w', default='', help='initial weight') parser.add_argument('--consistent_weight', default=10) args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print(args) print('') set_random_seed(args.seed) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 # trainのうち10000枚を検証用にとっておく. splitと呼ぶ # testの10000枚はラベルを-1に変換して、ラベルなしのデータとして扱う. unlabeledと呼ぶ # 1. testに対して、精度があがるのか? # 2. splitで、精度の向上と連動した様子が観察できるのか? train, test = get_cifar10() split = train[-10000:] train = train[:-10000] # label = -1のデータとして扱う unlabeled = [(x[0], -1) for x in test] print( f'train:{len(train)}, unlabeled:{len(unlabeled)}, test:{len(test)}' ) train = chainer.datasets.ConcatenatedDataset(train, unlabeled) elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet50': predictor = ResNet(None) predictor.fc6 = L.Linear(2048, class_labels) predictor.fc6.name = 'fc6' predictor2 = ResNet(None) predictor2.fc6 = L.Linear(2048, class_labels) predictor2.fc6.name = 'fc6' elif args.model == 'pyramid': predictor = shaked_pyramid_net.PyramidNet(skip=True) if not args.weights == '': print(f'loading weights from {args.weights}') chainer.serializers.load_npz(args.weights, predictor) chainer.serializers.load_npz(args.weights, predictor2) model = mean_teacher_train_chain.MeanTeacherTrainChain( predictor, predictor2, args.consistent_weight) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU model.teacher.to_gpu() optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) # augment train data print('currently, aug_method is ignored') train = dataset.SingleCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.LessonTransform(crop_size=(32, 32))) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, shuffle=True) split_iter = chainer.iterators.SerialIterator(split, args.batchsize, repeat=False, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # teacherをupdateするためのextension def update_teacher(trainer): model.on_update_finished(trainer) trainer.extend(update_teacher) # Evaluate the model with the test dataset for each epoch eval_trigger = (1, 'epoch') classifier = chainer.links.Classifier(model.teacher) split_evaluator = extensions.Evaluator(split_iter, classifier, device=args.gpu) split_evaluator.name = 'observable_validation' trainer.extend(split_evaluator, trigger=eval_trigger) truth_evaluator = extensions.Evaluator(test_iter, classifier, device=args.gpu) truth_evaluator.name = 'truth_validation' trainer.extend(truth_evaluator, trigger=eval_trigger) # Reduce the learning rate by half every 25 epochs. lr_drop_epoch = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_ratio = 0.1 print(f'lr schedule: {lr_drop_ratio}, timing: {lr_drop_epoch}') def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= lr_drop_ratio trainer.extend(lr_drop, trigger=chainer.training.triggers.ManualScheduleTrigger( lr_drop_epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch #trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'observable_best_accuracy.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'observable_validation/main/accuracy')) trainer.extend(extensions.snapshot_object(model, 'truth_best_accuracy.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'truth_validation/main/accuracy')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'lr', 'main/class_loss', 'main/consistency_loss', 'main/loss', 'main/teacher_accuracy', 'main/student_accuracy', 'observable_validation/main/loss', 'observable_validation/main/accuracy', 'truth_validation/main/accuracy', 'truth_validation/main/loss', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # interact with chainerui trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) # save args save_args(args, args.out) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # trainer.extend(extensions.dump_graph('main/loss')) # Run the training trainer.run()
def main(): args = arguments() chainer.config.autotune = True chainer.print_runtime_info() print(args) if args.dp: from net_dp import Encoder, Decoder else: from net import Encoder, Decoder if args.gpu >= 0: cuda.get_device(args.gpu).use() xp = cuda.cupy sp = cupyx.scipy.sparse else: print("runs desperately slowly without a GPU!") xp = np sp = scipy.sparse ## Input information ## # InputFile = scanconf.ScanConfig() # InputFile.reconSize = args.crop_width ## setup trainable links decoder = Decoder(args) if args.use_enc: encoder = Encoder(args) else: encoder = L.Linear(1) if args.use_dis: dis = Discriminator(args) else: dis = L.Linear(1) if args.model_dis: serializers.load_npz(args.model_dis, dis) print('discriminator model loaded: {}'.format(args.model_dis)) if args.model_gen: if 'enc' in args.model_gen and not args.decoder_only: serializers.load_npz(args.model_gen, encoder) print('encoder model loaded: {}'.format(args.model_gen)) serializers.load_npz(args.model_gen.replace('enc', 'dec'), decoder) print('decoder model loaded: {}'.format( args.model_gen.replace('enc', 'dec'))) if args.lambda_sd > 0 and args.lr_sd < 0.05: print( "\n\n for usual iterative reconstruction (-ls), --lr_sd should be around 0.1. \n\n" ) if args.latent_dim > 0: init = xp.zeros((args.batchsize, args.latent_dim)).astype(np.float32) elif args.decoder_only: init = xp.zeros((args.batchsize, decoder.latent_c, decoder.latent_h, decoder.latent_w)).astype(np.float32) else: init = xp.zeros((args.batchsize, 1, args.crop_height, args.crop_width)).astype(np.float32) # init = xp.random.uniform(-0.1,0.1,(1,1,args.crop_height,args.crop_width)).astype(np.float32) print("Initial image {} shape {}".format(args.model_image, init.shape)) seed = L.Parameter(init) if args.gpu >= 0: decoder.to_gpu() seed.to_gpu() encoder.to_gpu() dis.to_gpu() # setup optimisers def make_optimizer(model, lr, opttype='Adam'): # eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = optim[opttype](lr) #from profiled_optimizer import create_marked_profile_optimizer # optimizer = create_marked_profile_optimizer(optim[opttype](lr), sync=True, sync_level=2) optimizer.setup(model) if args.weight_decay > 0: if opttype in ['Adam', 'Adam_d', 'AdaBound', 'Eve']: optimizer.weight_decay_rate = args.weight_decay else: optimizer.add_hook( chainer.optimizer_hooks.WeightDecay(args.weight_decay)) # optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(100)) return optimizer optimizer_sd = make_optimizer(seed, args.lr_sd, args.optimizer) optimizer_dec = make_optimizer(decoder, args.lr_gen, args.optimizer) optimizer_enc = make_optimizer(encoder, args.lr_gen, args.optimizer) optimizer_dis = make_optimizer(dis, args.lr_dis, args.optimizer_dis) # unify CPU and GPU memory to load big matrices if args.unified_memory_pool and args.crop_height > 256: pool = cp.cuda.MemoryPool(cp.cuda.malloc_managed) cp.cuda.set_allocator(pool.malloc) # projection matrices prMats, conjMats = None, None if args.lambda_sd > 0 or args.lambda_nn > 0: prMat = scipy.sparse.load_npz( os.path.join(args.root, args.projection_matrix)).tocsr(copy=False) # cx = prMat.tocsr() # rows,cols = cx.nonzero() # for i,j in zip(rows,cols): # if cx[i,j] < 1e-5: # cx[i,j] = 0 # prMat = cx.tocoo() # scipy.sparse.save_npz("d:/ml/reconst/pr.npz",prMat) prMats = [ sp.coo_matrix((prMat[np.arange(i, prMat.shape[0], args.osem), :]), dtype=np.float32) for i in range(args.osem) ] prMats = [ chainer.utils.CooMatrix(p.data, p.row, p.col, p.shape) for p in prMats ] print("Projection matrix {} shape {}, thinned {} x {}".format( args.projection_matrix, prMat.shape, prMats[0].shape, len(prMats))) if args.system_matrix: conjMat = scipy.sparse.load_npz( os.path.join(args.root, args.system_matrix)).tocsr(copy=False) conjMats = [ sp.coo_matrix( (conjMat[np.arange(i, conjMat.shape[0], args.osem), :]), dtype=np.float32) for i in range(args.osem) ] conjMats = [ chainer.utils.CooMatrix(p.data, p.row, p.col, p.shape) for p in conjMats ] # conjMat = sp.coo_matrix(conjMat, dtype = np.float32) # conjMat = chainer.utils.CooMatrix(conjMat.data, conjMat.row, conjMat.col, conjMat.shape) print("Conjugate matrix {} shape {}, thinned {} x {}".format( args.system_matrix, conjMat.shape, conjMats[0].shape, len(conjMats))) # setup updater print("Setting up data iterators...") planct_dataset = Dataset(path=args.planct_dir, baseA=args.HU_base, rangeA=args.HU_range, crop=(args.crop_height, args.crop_width), scale_to=args.scale_to, random=args.random_translate) planct_iter = chainer.iterators.SerialIterator(planct_dataset, args.batchsize, shuffle=True) mvct_dataset = Dataset(path=args.mvct_dir, baseA=args.HU_base, rangeA=args.HU_range, crop=(args.crop_height, args.crop_width), scale_to=args.scale_to, random=args.random_translate) mvct_iter = chainer.iterators.SerialIterator(mvct_dataset, args.batchsize, shuffle=True) data = prjData(args.sinogram, osem=args.osem) proj_iter = chainer.iterators.SerialIterator(data, args.batchsize, shuffle=False) # True updater = Updater(models=(seed, encoder, decoder, dis), iterator={ 'main': proj_iter, 'planct': planct_iter, 'mvct': mvct_iter }, optimizer={ 'main': optimizer_sd, 'enc': optimizer_enc, 'dec': optimizer_dec, 'dis': optimizer_dis }, device=args.gpu, params={ 'args': args, 'prMats': prMats, 'conjMats': conjMats }) # logging if args.epoch < 0: total_iter = -args.epoch * args.iter * math.ceil( len(data) / args.batchsize) else: total_iter = args.epoch * args.iter trainer = training.Trainer(updater, (total_iter, 'iteration'), out=args.out) log_interval = (50, 'iteration') log_keys_main = [] log_keys_dis = [] log_keys_grad = [ 'main/grad_sd', 'main/grad_gen', 'main/grad_sd_consistency', 'main/grad_gen_consistency', 'main/seed_diff' ] loss_main_list = [(args.lambda_sd, 'main/loss_sd'), (args.lambda_nn, 'main/loss_nn'), (args.lambda_ae1, 'main/loss_ae1'), (args.lambda_ae2, 'main/loss_ae2'), (args.lambda_tv, 'main/loss_tv'), (args.lambda_tvs, 'main/loss_tvs'), (args.lambda_reg, 'main/loss_reg'), (args.lambda_reg, 'main/loss_reg_ae')] for a, k in loss_main_list: if a > 0: log_keys_main.append(k) loss_dis_list = [(args.lambda_adv, 'main/loss_adv'), (args.lambda_advs, 'main/loss_advs'), (args.dis_freq, 'main/loss_dis'), (args.lambda_gan, 'main/loss_gan')] for a, k in loss_dis_list: if a > 0: log_keys_dis.append(k) log_keys = ['iteration'] + log_keys_main + log_keys_dis + log_keys_grad trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(keys=log_keys, trigger=log_interval)) trainer.extend(extensions.PrintReport(log_keys), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(log_keys_main, 'iteration', trigger=(100, 'iteration'), file_name='loss.png', postprocess=plot_log)) trainer.extend( extensions.PlotReport(log_keys_dis, 'iteration', trigger=(100, 'iteration'), file_name='loss_dis.png')) trainer.extend( extensions.PlotReport(log_keys_grad, 'iteration', trigger=(100, 'iteration'), file_name='loss_grad.png', postprocess=plot_log)) # trainer.extend(extensions.ParameterStatistics([seed,decoder])) ## very slow trainer.extend(CommandsExtension()) if args.snapinterval <= 0: args.snapinterval = total_iter if args.lambda_nn > 0: trainer.extend( extensions.dump_graph('main/loss_nn', out_name='gen.dot')) elif args.lambda_ae1 > 0: trainer.extend( extensions.dump_graph('main/loss_ae1', out_name='gen.dot')) # save models if args.use_enc: trainer.extend(extensions.snapshot_object( encoder, 'enc_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) trainer.extend(extensions.snapshot_object( optimizer_enc, 'opt_enc_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) if args.use_dis: trainer.extend(extensions.snapshot_object( dis, 'dis_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) trainer.extend(extensions.snapshot_object( optimizer_dis, 'opt_dis_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) # trainer.extend(extensions.dump_graph('main/loss_real', out_name='dis.dot')) trainer.extend(extensions.snapshot_object(decoder, 'dec_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) trainer.extend(extensions.snapshot_object( optimizer_dec, 'opt_dec_{.updater.iteration}.npz'), trigger=(args.snapinterval, 'iteration')) # save command line arguments os.makedirs(args.out, exist_ok=True) save_args(args, args.out) with open(os.path.join(args.out, "args.txt"), 'w') as fh: fh.write(" ".join(sys.argv)) trainer.run()
def train(dataset_train, dataset_test, gpu, batch_size, skip=False, intermediats=9, suffix='', niter=100, args=None, comment='', task='logs'): np.random.seed(0) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() cp.random.seed(0) # Model G_A = ResnetSkipGenerator(skip=skip, intermediates=intermediats) G_B = ResnetSkipGenerator(skip=skip, intermediates=intermediats) D_A = NLayerDiscriminator() D_B = NLayerDiscriminator() if gpu >= 0: G_A.to_gpu() G_B.to_gpu() D_A.to_gpu() D_B.to_gpu() # Optimizer lr = 0.0002 beta1 = 0.5 beta2 = 0.999 optimizer_G_A = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_G_B = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_D_A = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_D_B = O.Adam(alpha=lr, beta1=beta1, beta2=beta2) optimizer_G_A.setup(G_A) optimizer_G_B.setup(G_B) optimizer_D_A.setup(D_A) optimizer_D_B.setup(D_B) # Dataset iter_train = chainer.iterators.SerialIterator(dataset_train, batch_size=batch_size) iter_test = chainer.iterators.SerialIterator(dataset_test, batch_size=batch_size, repeat=False, shuffle=False) # Updater epoch_count = 1 niter = niter niter_decay = niter updater = CycleGANUpdater( iterator=iter_train, optimizer=dict( G_A=optimizer_G_A, G_B=optimizer_G_B, D_A=optimizer_D_A, D_B=optimizer_D_B, ), device=gpu, ) # Trainer directory = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') directory = comment + '_' + directory out = osp.join(task, directory) out += suffix trainer = training.Trainer(updater, (niter + niter_decay, 'epoch'), out=out) save_args(args, out) trainer.extend(extensions.snapshot_object( target=G_A, filename='G_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=G_B, filename='G_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_A, filename='D_A_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( target=D_B, filename='D_B_{.updater.epoch:08}.npz'), trigger=(1, 'epoch')) log_interval = (100, 'iteration') trainer.extend(extensions.LogReport(trigger=log_interval)) assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport(y_keys=['loss_gen_A', 'loss_gen_B'], x_key='iteration', file_name='loss_gen.png', trigger=log_interval)) trainer.extend( extensions.PlotReport(y_keys=['loss_dis_A', 'loss_dis_B'], x_key='iteration', file_name='loss_dis.png', trigger=log_interval)) trainer.extend( extensions.PlotReport(y_keys=['loss_cyc_A', 'loss_cyc_B'], x_key='iteration', file_name='loss_cyc.png', trigger=log_interval)) trainer.extend( extensions.PlotReport(y_keys=['loss_idt_A', 'loss_idt_B'], x_key='iteration', file_name='loss_idt.png', trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'elapsed_time', 'loss_gen_A', 'loss_gen_B', 'loss_dis_A', 'loss_dis_B', 'loss_cyc_A', 'loss_cyc_B', 'loss_idt_A', 'loss_idt_B', ])) trainer.extend(extensions.ProgressBar(update_interval=20 // batch_size)) trainer.extend(CycleGANEvaluator(iter_test, device=gpu)) @training.make_extension(trigger=(1, 'epoch')) def tune_learning_rate(trainer): epoch = trainer.updater.epoch lr_rate = 1.0 - (max(0, epoch + 1 + epoch_count - niter) / float(niter_decay + 1)) trainer.updater.get_optimizer('G_A').alpha *= lr_rate trainer.updater.get_optimizer('G_B').alpha *= lr_rate trainer.updater.get_optimizer('D_A').alpha *= lr_rate trainer.updater.get_optimizer('D_B').alpha *= lr_rate trainer.extend(tune_learning_rate) trainer.run()
def main(): args = arguments() out = os.path.join(args.out, dt.now().strftime('%m%d_%H%M')) print(args) print("\nresults are saved under: ", out) save_args(args, out) if args.imgtype == "dcm": from dataset_dicom import Dataset as Dataset else: from dataset_jpg import DatasetOutMem as Dataset # CUDA if not chainer.cuda.available: print("CUDA required") exit() if len(args.gpu) == 1 and args.gpu[0] >= 0: chainer.cuda.get_device_from_id(args.gpu[0]).use() # cuda.cupy.cuda.set_allocator(cuda.cupy.cuda.MemoryPool().malloc) # Enable autotuner of cuDNN chainer.config.autotune = True chainer.config.dtype = dtypes[args.dtype] chainer.print_runtime_info() # Turn off type check # chainer.config.type_check = False # print('Chainer version: ', chainer.__version__) # print('GPU availability:', chainer.cuda.available) # print('cuDNN availablility:', chainer.cuda.cudnn_enabled) ## dataset iterator print("Setting up data iterators...") train_A_dataset = Dataset(path=os.path.join(args.root, 'trainA'), args=args, random=args.random_translate, forceSpacing=0) train_B_dataset = Dataset(path=os.path.join(args.root, 'trainB'), args=args, random=args.random_translate, forceSpacing=args.forceSpacing) test_A_dataset = Dataset(path=os.path.join(args.root, 'testA'), args=args, random=0, forceSpacing=0) test_B_dataset = Dataset(path=os.path.join(args.root, 'testB'), args=args, random=0, forceSpacing=args.forceSpacing) args.ch = train_A_dataset.ch args.out_ch = train_B_dataset.ch print("channels in A {}, channels in B {}".format(args.ch, args.out_ch)) test_A_iter = chainer.iterators.SerialIterator(test_A_dataset, args.nvis_A, shuffle=False) test_B_iter = chainer.iterators.SerialIterator(test_B_dataset, args.nvis_B, shuffle=False) if args.batch_size > 1: train_A_iter = chainer.iterators.MultiprocessIterator(train_A_dataset, args.batch_size, n_processes=3) train_B_iter = chainer.iterators.MultiprocessIterator(train_B_dataset, args.batch_size, n_processes=3) else: train_A_iter = chainer.iterators.SerialIterator( train_A_dataset, args.batch_size) train_B_iter = chainer.iterators.SerialIterator( train_B_dataset, args.batch_size) # setup models enc_x = net.Encoder(args) enc_y = enc_x if args.single_encoder else net.Encoder(args) dec_x = net.Decoder(args) dec_y = net.Decoder(args) dis_x = net.Discriminator(args) dis_y = net.Discriminator(args) dis_z = net.Discriminator( args) if args.lambda_dis_z > 0 else chainer.links.Linear(1, 1) models = { 'enc_x': enc_x, 'dec_x': dec_x, 'enc_y': enc_y, 'dec_y': dec_y, 'dis_x': dis_x, 'dis_y': dis_y, 'dis_z': dis_z } ## load learnt models if args.load_models: for e in models: m = args.load_models.replace('enc_x', e) try: serializers.load_npz(m, models[e]) print('model loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # select GPU if len(args.gpu) == 1: for e in models: models[e].to_gpu() print('using gpu {}, cuDNN {}'.format(args.gpu, chainer.cuda.cudnn_enabled)) else: print("mandatory GPU use: currently only a single GPU can be used") exit() # Setup optimisers def make_optimizer(model, lr, opttype='Adam'): # eps = 1e-5 if args.dtype==np.float16 else 1e-8 optimizer = optim[opttype](lr) #from profiled_optimizer import create_marked_profile_optimizer # optimizer = create_marked_profile_optimizer(optim[opttype](lr), sync=True, sync_level=2) if args.weight_decay > 0: if opttype in ['Adam', 'AdaBound', 'Eve']: optimizer.weight_decay_rate = args.weight_decay else: if args.weight_decay_norm == 'l2': optimizer.add_hook( chainer.optimizer.WeightDecay(args.weight_decay)) else: optimizer.add_hook( chainer.optimizer_hooks.Lasso(args.weight_decay)) optimizer.setup(model) return optimizer opt_enc_x = make_optimizer(enc_x, args.learning_rate_g, args.optimizer) opt_dec_x = make_optimizer(dec_x, args.learning_rate_g, args.optimizer) opt_enc_y = make_optimizer(enc_y, args.learning_rate_g, args.optimizer) opt_dec_y = make_optimizer(dec_y, args.learning_rate_g, args.optimizer) opt_x = make_optimizer(dis_x, args.learning_rate_d, args.optimizer) opt_y = make_optimizer(dis_y, args.learning_rate_d, args.optimizer) opt_z = make_optimizer(dis_z, args.learning_rate_d, args.optimizer) optimizers = { 'opt_enc_x': opt_enc_x, 'opt_dec_x': opt_dec_x, 'opt_enc_y': opt_enc_y, 'opt_dec_y': opt_dec_y, 'opt_x': opt_x, 'opt_y': opt_y, 'opt_z': opt_z } if args.load_optimizer: for e in optimizers: try: m = args.load_models.replace('enc_x', e) serializers.load_npz(m, optimizers[e]) print('optimiser loaded: {}'.format(m)) except: print("couldn't load {}".format(m)) pass # Set up an updater: TODO: multi gpu updater print("Preparing updater...") updater = Updater( models=(enc_x, dec_x, enc_y, dec_y, dis_x, dis_y, dis_z), iterator={ 'main': train_A_iter, 'train_B': train_B_iter, }, optimizer=optimizers, # converter=convert.ConcatWithAsyncTransfer(), device=args.gpu[0], params={'args': args}) if args.snapinterval < 0: args.snapinterval = args.lrdecay_start + args.lrdecay_period log_interval = (200, 'iteration') model_save_interval = (args.snapinterval, 'epoch') plot_interval = (500, 'iteration') # Set up a trainer print("Preparing trainer...") if args.iteration: stop_trigger = (args.iteration, 'iteration') else: stop_trigger = (args.lrdecay_start + args.lrdecay_period, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=out) for e in models: trainer.extend(extensions.snapshot_object(models[e], e + '{.updater.epoch}.npz'), trigger=model_save_interval) # trainer.extend(extensions.ParameterStatistics(models[e])) ## very slow for e in optimizers: trainer.extend(extensions.snapshot_object(optimizers[e], e + '{.updater.epoch}.npz'), trigger=model_save_interval) log_keys = ['epoch', 'iteration', 'lr'] log_keys_cycle = [ 'opt_enc_x/loss_cycle', 'opt_enc_y/loss_cycle', 'opt_dec_x/loss_cycle', 'opt_dec_y/loss_cycle', 'myval/cycle_x_l1', 'myval/cycle_y_l1' ] log_keys_d = [ 'opt_x/loss_real', 'opt_x/loss_fake', 'opt_y/loss_real', 'opt_y/loss_fake', 'opt_z/loss_x', 'opt_z/loss_y' ] log_keys_adv = [ 'opt_enc_y/loss_adv', 'opt_dec_y/loss_adv', 'opt_enc_x/loss_adv', 'opt_dec_x/loss_adv' ] log_keys.extend( ['opt_enc_x/loss_reg', 'opt_enc_y/loss_reg', 'opt_dec_y/loss_tv']) if args.lambda_air > 0: log_keys.extend(['opt_dec_x/loss_air', 'opt_dec_y/loss_air']) if args.lambda_grad > 0: log_keys.extend(['opt_dec_x/loss_grad', 'opt_dec_y/loss_grad']) if args.lambda_identity_x > 0: log_keys.extend(['opt_dec_x/loss_id', 'opt_dec_y/loss_id']) if args.dis_reg_weighting > 0: log_keys_d.extend( ['opt_x/loss_reg', 'opt_y/loss_reg', 'opt_z/loss_reg']) if args.dis_wgan: log_keys_d.extend(['opt_x/loss_gp', 'opt_y/loss_gp', 'opt_z/loss_gp']) log_keys_all = log_keys + log_keys_d + log_keys_adv + log_keys_cycle trainer.extend( extensions.LogReport(keys=log_keys_all, trigger=log_interval)) trainer.extend(extensions.PrintReport(log_keys_all), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=20)) trainer.extend(extensions.observe_lr(optimizer_name='opt_enc_x'), trigger=log_interval) # learning rate scheduling decay_start_iter = len(train_A_dataset) * args.lrdecay_start decay_end_iter = len(train_A_dataset) * (args.lrdecay_start + args.lrdecay_period) for e in [opt_enc_x, opt_enc_y, opt_dec_x, opt_dec_y]: trainer.extend( extensions.LinearShift('alpha', (args.learning_rate_g, 0), (decay_start_iter, decay_end_iter), optimizer=e)) for e in [opt_x, opt_y, opt_z]: trainer.extend( extensions.LinearShift('alpha', (args.learning_rate_d, 0), (decay_start_iter, decay_end_iter), optimizer=e)) ## dump graph if args.report_start < 1: if args.lambda_tv > 0: trainer.extend( extensions.dump_graph('opt_dec_y/loss_tv', out_name='dec.dot')) if args.lambda_reg > 0: trainer.extend( extensions.dump_graph('opt_enc_x/loss_reg', out_name='enc.dot')) trainer.extend( extensions.dump_graph('opt_x/loss_fake', out_name='dis.dot')) # ChainerUI # trainer.extend(CommandsExtension()) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(log_keys[3:], 'iteration', trigger=plot_interval, file_name='loss.png')) trainer.extend( extensions.PlotReport(log_keys_d, 'iteration', trigger=plot_interval, file_name='loss_d.png')) trainer.extend( extensions.PlotReport(log_keys_adv, 'iteration', trigger=plot_interval, file_name='loss_adv.png')) trainer.extend( extensions.PlotReport(log_keys_cycle, 'iteration', trigger=plot_interval, file_name='loss_cyc.png')) ## visualisation vis_folder = os.path.join(out, "vis") os.makedirs(vis_folder, exist_ok=True) if not args.vis_freq: args.vis_freq = len(train_A_dataset) // 2 s = [k for k in range(args.num_slices) ] if args.num_slices > 0 and args.imgtype == "dcm" else None trainer.extend(VisEvaluator({ "testA": test_A_iter, "testB": test_B_iter }, { "enc_x": enc_x, "enc_y": enc_y, "dec_x": dec_x, "dec_y": dec_y }, params={ 'vis_out': vis_folder, 'slice': s }, device=args.gpu[0]), trigger=(args.vis_freq, 'iteration')) ## output filenames of training dataset with open(os.path.join(out, 'trainA.txt'), 'w') as output: for f in train_A_dataset.names: output.writelines("\n".join(f)) output.writelines("\n") with open(os.path.join(out, 'trainB.txt'), 'w') as output: for f in train_B_dataset.names: output.writelines("\n".join(f)) output.writelines("\n") # archive the scripts rundir = os.path.dirname(os.path.realpath(__file__)) import zipfile with zipfile.ZipFile(os.path.join(out, 'script.zip'), 'w', compression=zipfile.ZIP_DEFLATED) as new_zip: for f in [ 'train.py', 'net.py', 'updater.py', 'consts.py', 'losses.py', 'arguments.py', 'convert.py' ]: new_zip.write(os.path.join(rundir, f), arcname=f) # Run the training trainer.run()
def main(): ''' main function, start point ''' # 引数関連 parser = argparse.ArgumentParser() parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.001, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU1 ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=2, help='GPU2 ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--iter_parallel', '-p', action='store_true', default=False, help='loading dataset from disk') parser.add_argument('--test', action='store_true', default=False, help='Test Mode, a few dataset') parser.add_argument('--opt', '-o', type=str, choices=('adam', 'sgd'), default='adam') parser.add_argument('--fsize', '-f', type=int, default=5) parser.add_argument('--ch', '-c', type=int, default=4) args = parser.parse_args() # parameter出力 print("-=Learning Parameter=-") print("# Max Epochs: {}".format(args.epoch)) print("# Batch Size: {}".format(args.batchsize)) print("# Learning Rate: {}".format(args.learnrate)) print("# Optimizer Method: {}".format(args.opt)) print("# Filter Size: {}".format(args.fsize)) print("# Channel Scale: {}".format(args.ch)) print('# Train Dataet: General 100') if args.iter_parallel: print("# Data Iters that loads in Parallel") print("\n") # 保存ディレクトリ # save didrectory model_dir_name = 'AEFINet2_opt_{}_ch_{}_fsize_{}'.format( args.opt, args.ch, args.fsize) outdir = path.join(ROOT_PATH, 'results', 'FI', 'AEFINet', model_dir_name) if not path.exists(outdir): os.makedirs(outdir) with open(path.join(outdir, 'arg_param.txt'), 'w') as f: for k, v in args.__dict__.items(): f.write('{}:{}\n'.format(k, v)) #loading dataset if args.test: print( '# loading test dataet(UCF101_minimam_test_size64_frame3_group2_max4_p) ...' ) train_dataset = 'UCF101_minimam_test_size64_frame3_group2_max4_p' test_dataset = 'UCF101_minimam_test_size64_frame3_group2_max4_p' else: print( '# loading test dataet(UCF101_train_size64_frame3_group10_max100_p, UCF101_test_size64_frame3_group25_max5_p) ...' ) train_dataset = 'UCF101_train_size64_frame3_group10_max100_p' test_dataset = 'UCF101_test_size64_frame3_group25_max5_p' if args.iter_parallel: train = ds.SequenceDataset(dataset=train_dataset) test = ds.SequenceDataset(dataset=test_dataset) else: train = ds.SequenceDatasetOnMem(dataset=train_dataset) test = ds.SequenceDatasetOnMem(dataset=test_dataset) # prepare model # chainer.cuda.get_device_from_id(args.gpu).use() model = N.AEFINet2(vgg_path=path.join(ROOT_PATH, 'models', 'VGG16.npz'), f_size=args.fsize, n_ch=args.ch, size=64) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # setup optimizer if args.opt == 'adam': optimizer = chainer.optimizers.Adam(alpha=args.learnrate) elif args.opt == 'sgd': optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # setup iter if args.iter_parallel: train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=8) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False, n_processes=8) else: train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) # updater = training.ParallelUpdater( # train_iter, # optimizer, # devices={'main': args.gpu, 'second': args.gpu1}, # loss_func=model.get_loss_func(), # ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir) # # eval test data trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func())) # dump loss graph trainer.extend(extensions.dump_graph('main/loss')) # lr shift if args.opt == 'sgd': trainer.extend(extensions.ExponentialShift("lr", 0.1), trigger=(50, 'epoch')) elif args.opt == 'adam': trainer.extend(extensions.ExponentialShift("alpha", 0.1), trigger=(50, 'epoch')) # save snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_snapshot_{.updater.epoch}'), trigger=(10, 'epoch')) # log report trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # plot loss graph trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) # plot acc graph trainer.extend( extensions.PlotReport(['main/psnr', 'validation/main/psnr'], 'epoch', file_name='PSNR.png')) # print info trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/psnr', 'validation/main/psnr', 'lr', 'elapsed_time' ])) # print progbar trainer.extend(extensions.ProgressBar()) # [ChainerUI] enable to send commands from ChainerUI trainer.extend(CommandsExtension()) # [ChainerUI] save 'args' to show experimental conditions save_args(args, outdir) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save final model model_outdir = path.join(ROOT_PATH, 'models', model_dir_name) if not path.exists(model_outdir): os.makedirs(model_outdir) model_name = 'AEFINet2_{}_ch_{}_fsize_{}.npz'.format( args.opt, args.ch, args.fsize) chainer.serializers.save_npz(path.join(model_outdir, model_name), model) model_parameter = { 'name': 'AEFINetConcat', 'parameter': { 'f_size': args.fsize, 'ch': args.ch } } with open(path.join(model_outdir, 'model_parameter.json'), 'w') as f: json.dump(model_parameter, f)
trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trainer = training.Trainer(updater, out=arguments.output) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend(extensions.PrintReport(['predictor/loss'])) save_args(arguments, arguments.output) # trainer.extend(extensions.ProgressBar()) trainer.run()
def run_training(config: str, device: int, seed: int): configs = ConfigParser.parse(config) params = yaml.load(open(config, encoding="utf-8")) if device >= 0: cuda.get_device(device).use() set_seed(seed, device) vocab = Vocabulary.prepare(configs) num_word_vocab = max(vocab.dictionaries["word2idx"].values()) + 1 num_char_vocab = max(vocab.dictionaries["char2idx"].values()) + 1 num_tag_vocab = max(vocab.dictionaries["tag2idx"].values()) + 1 model = BiLSTM_CRF(configs, num_word_vocab, num_char_vocab, num_tag_vocab) transformer = DatasetTransformer(vocab) transform = transformer.transform external_configs = configs["external"] if "word_vector" in external_configs: syn0 = model.embed_word.W.data _, word_dim = syn0.shape pre_word_dim = vocab.gensim_model.vector_size if word_dim != pre_word_dim: msg = "Mismatch vector size between model and pre-trained word vectors" # NOQA msg += f"(model: \x1b[31m{word_dim}\x1b[0m" msg += f", pre-trained word vector: \x1b[31m{pre_word_dim}\x1b[0m" raise Exception(msg) word2idx = vocab.dictionaries["word2idx"] syn0 = prepare_pretrained_word_vector(word2idx, vocab.gensim_model, syn0, num_word_vocab) model.set_pretrained_word_vectors(syn0) train_iterator = create_iterator(vocab, configs, "train", transform) valid_iterator = create_iterator(vocab, configs, "valid", transform) test_iterator = create_iterator(vocab, configs, "test", transform) if device >= 0: model.to_gpu(device) optimizer = create_optimizer(configs) optimizer.setup(model) optimizer = add_hooks(optimizer, configs) updater = T.StandardUpdater(train_iterator, optimizer, converter=converter, device=device) params = configs.export() params["num_word_vocab"] = num_word_vocab params["num_char_vocab"] = num_char_vocab params["num_tag_vocab"] = num_tag_vocab epoch = configs["iteration"]["epoch"] trigger = (epoch, "epoch") model_path = configs["output"] timestamp = datetime.datetime.now() timestamp_str = timestamp.isoformat() output_path = Path(f"{model_path}.{timestamp_str}") trainer = T.Trainer(updater, trigger, out=output_path) save_args(params, output_path) msg = f"Create \x1b[31m{output_path}\x1b[0m for saving model snapshots" logging.debug(msg) entries = ["epoch", "iteration", "elapsed_time", "lr", "main/loss"] entries += ["validation/main/loss", "validation/main/fscore"] entries += ["validation_1/main/loss", "validation_1/main/fscore"] valid_evaluator = NamedEntityEvaluator(valid_iterator, model, transformer.itransform, converter, device=device) test_evaluator = NamedEntityEvaluator(test_iterator, model, transformer.itransform, converter, device=device) epoch_trigger = (1, "epoch") snapshot_filename = "snapshot_epoch_{.updater.epoch:04d}" trainer.extend(valid_evaluator, trigger=epoch_trigger) trainer.extend(test_evaluator, trigger=epoch_trigger) trainer.extend(E.observe_lr(), trigger=epoch_trigger) trainer.extend(E.LogReport(trigger=epoch_trigger)) trainer.extend(E.PrintReport(entries=entries), trigger=epoch_trigger) trainer.extend(E.ProgressBar(update_interval=20)) trainer.extend(E.snapshot_object(model, filename=snapshot_filename), trigger=(1, "epoch")) if "learning_rate_decay" in params: logger.debug("Enable Learning Rate decay") trainer.extend( LearningRateDecay("lr", params["learning_rate"], params["learning_rate_decay"]), trigger=epoch_trigger, ) trainer.run()
loss_config=config.loss, predictor=predictor, discriminator=discriminator, device=config.train.gpu, iterator=train_iter, optimizer=opts, converter=converter, ) # trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trainer = training.Trainer(updater, out=arguments.output) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend(extensions.PrintReport(['predictor/loss'])) save_args(arguments, arguments.output) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Train Deblur Network') parser.add_argument('--seed', '-s', type=int, default=0, help='seed for random values') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.1, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=50, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print(args) print('') set_random_seed(args.seed) predictor = srcnn.create_srcnn() model = L.Classifier(predictor, lossfun=F.mean_squared_error, accfun=psnr) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) base_dir = 'data/blurred_sharp' train_data = pairwise_dataset.PairwiseDataset( blur_image_list=str(Path(base_dir).joinpath('train_blur_images.txt')), sharp_image_list=str( Path(base_dir).joinpath('train_sharp_images.txt')), root=base_dir) train_data = chainer.datasets.TransformDataset(train_data, transform.Transform()) test_data = pairwise_dataset.PairwiseDataset( blur_image_list=str(Path(base_dir).joinpath('test_blur_images.txt')), sharp_image_list=str(Path(base_dir).joinpath('test_sharp_images.txt')), root=base_dir) # 普通はTransformしないような気がするけど、解像度がかわっちゃうのがなー test_data = chainer.datasets.TransformDataset(test_data, transform.Transform()) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize) test_iter = chainer.iterators.SerialIterator(test_data, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.FailOnNonNumber()) # Evaluate the model with the test dataset for each epoch eval_trigger = (1, 'epoch') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=eval_trigger) # Reduce the learning rate by half every 25 epochs. lr_drop_epoch = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_ratio = 0.1 print('lr schedule: {}, timing: {}'.format(lr_drop_ratio, lr_drop_epoch)) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= lr_drop_ratio trainer.extend(lr_drop, trigger=chainer.training.triggers.ManualScheduleTrigger( lr_drop_epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(model.predictor, 'model_{.updater.epoch}.npz'), trigger=(1, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport([ 'epoch', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), trigger=(100, 'iteration')) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # interact with chainerui trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) # save args save_args(args, args.out) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--seed', '-s', type=int, default=0, help='seed for random values') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.1, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--aug_method', '-a', default='both', choices=['none', 'mixup', 'random_erasing', 'both'], help='data augmentation strategy') parser.add_argument('--model', '-m', default='pyramid', choices=['resnet50', 'pyramid'], help='data augmentation strategy') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print(args) print('') set_random_seed(args.seed) # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() # for mean-teacher experiment #train = train[:-10000] #print(len(train)) elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') if args.model == 'resnet50': predictor = ResNet(None) predictor.fc6 = L.Linear(2048, class_labels) elif args.model == 'pyramid': predictor = shaked_pyramid_net.PyramidNet(skip=True) # 下の方にあるtrain dataのtransformの条件分岐とかぶってるけどなー if args.aug_method in ('both', 'mixup'): lossfun = soft_label_classification_loss accfun = soft_label_classification_acc else: lossfun = F.softmax_cross_entropy accfun = F.accuracy model = L.Classifier(predictor, lossfun=lossfun, accfun=accfun) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) # augment train data if args.aug_method == 'none': print('data augmentationなしです') train = dataset.SingleCifar10((train, None)) elif args.aug_method in ('both', 'mixup'): use_random_erasing = args.aug_method == 'both' train = dataset.PairwiseCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.MixupTransform(use_random_erasing=use_random_erasing)) elif args.aug_method == 'random_erasing': train = dataset.SingleCifar10((train, None)) train = chainer.datasets.transform_dataset.TransformDataset( train, transformer.RandomErasingTransform()) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch eval_trigger = (1, 'epoch') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=eval_trigger) # Reduce the learning rate by half every 25 epochs. lr_drop_epoch = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_ratio = 0.1 print(f'lr schedule: {lr_drop_ratio}, timing: {lr_drop_epoch}') def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= lr_drop_ratio trainer.extend(lr_drop, trigger=chainer.training.triggers.ManualScheduleTrigger( lr_drop_epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=(1, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # interact with chainerui trainer.extend(CommandsExtension(), trigger=(100, 'iteration')) # save args save_args(args, args.out) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()