def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, valid = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset( train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train)) ) valid = chainer.datasets.SubDataset( valid, 0, N_VALID_EXAMPLES, order=rng.permutation(len(valid)) ) else: train, valid = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) valid = chainermn.scatter_dataset(valid, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) valid_iter = chainer.iterators.SerialIterator(valid, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, "epoch")) # Add Chainer extension for pruners. trainer.extend( optuna.integration.ChainerPruningExtension( trial, "validation/main/accuracy", (PRUNER_INTERVAL, "epoch") ) ) evaluator = chainer.training.extensions.Evaluator(valid_iter, model) trainer.extend(chainermn.create_multi_node_evaluator(evaluator, comm)) log_report_extension = chainer.training.extensions.LogReport(log_name=None) trainer.extend(log_report_extension) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. # Please set show_loop_exception_msg False to inhibit messages about TrialPruned exception. # ChainerPruningExtension raises TrialPruned exception to stop training, and # trainer shows some messages every time it receive TrialPruned. trainer.run(show_loop_exception_msg=False) # Evaluate. evaluator = chainer.training.extensions.Evaluator(valid_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report["main/accuracy"]
def get_trainer(args, comm, model, device, train_iterator, val_iterator, optimizer): updater = training.StandardUpdater(train_iterator, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') # Evaluator evaluator = TestModeEvaluator(val_iterator, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) if args.optimizer == 'rmsprop_warmup': scheduler = dlframeworks.chainer.optimizers.RMSpropWarmupScheduler( comm.size, args.batchsize) trainer.extend(scheduler) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) return trainer
def main(args, model, x, t, valid_rate=0.2): print('Start a training script using multiple nodes.') comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank assert device >= 0, 'invalid device ID: {}'.format(device) if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: threshold = int(len(t) * (1 - valid_rate)) train = datasets.tuple_dataset.TupleDataset(x[0:threshold], t[0:threshold]) valid = datasets.tuple_dataset.TupleDataset(x[threshold:], t[threshold:]) datasize = len(train) * args.epoch else: train, valid = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) valid = chainermn.scatter_dataset(valid, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) valid_iter = chainer.iterators.SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) if device >= 0: cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.SGD(lr=2e-4), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-2)) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator(valid_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) prepare_extensions(trainer, evaluator, args, comm) trainer.run() if comm.rank == 0: throughput = datasize / trainer.elapsed_time print('Throughput: {} [images/sec.] ({} / {})'.format( throughput, datasize, trainer.elapsed_time)) model_filepath = os.path.join(args.out, 'trained.model') chainer.serializers.save_npz(model_filepath, model)
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset(test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() # The following line mitigates the memory problem in CircleCI # (see https://github.com/pfnet/optuna/pull/325 for more details). gc.collect() return 1.0 - report['main/accuracy']
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, valid = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) valid = chainer.datasets.SubDataset(valid, 0, N_VALID_EXAMPLES, order=rng.permutation(len(valid))) else: train, valid = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) valid = chainermn.scatter_dataset(valid, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) valid_iter = chainer.iterators.SerialIterator(valid, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, "epoch")) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator(valid_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report["main/accuracy"]
def objective(trial, comm): device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() # Sample an architecture. model = L.Classifier(create_model(trial)) model.to_gpu() # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. Only worker 0 loads the whole dataset. # The dataset of worker 0 is evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset( train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset( test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator( train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator( test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater( train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator( test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return report['main/accuracy']
def test_mnist(self, display_log=True): # This test file is intended to be run on Travis-CI and # GPU is not used for now. epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') model = L.Classifier(MLP(n_units, 10)) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] self.assertGreaterEqual(err, 0.95)
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset(test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. trainer.run() # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() return 1.0 - report['main/accuracy']
def train(self, dataset, comm=None): mc = self.model_config tc = self.training_config if comm is None: comm = chainermn.create_communicator('naive', MPI.comm) result = {'training_time': 0.0, 'observation': []} # model and optimizer master_nnp = MasterNNP( tc.elements, mc.n_input, mc.hidden_layers, mc.n_output) master_opt = chainer.optimizers.Adam(tc.init_lr) master_opt = chainermn.create_multi_node_optimizer(master_opt, comm) master_opt.setup(master_nnp) master_opt.add_hook(chainer.optimizer_hooks.Lasso(tc.l1_norm)) master_opt.add_hook(chainer.optimizer_hooks.WeightDecay(tc.l2_norm)) for training, test in dataset: tag = training.tag properties = training.property.properties # iterators train_iter = chainer.iterators.SerialIterator( training, tc.batch_size // MPI.size, repeat=True, shuffle=True) test_iter = chainer.iterators.SerialIterator( test, tc.batch_size // MPI.size, repeat=False, shuffle=False) # model hdnnp = HighDimensionalNNP( training.elemental_composition, mc.n_input, mc.hidden_layers, mc.n_output) hdnnp.sync_param_with(master_nnp) main_opt = chainer.Optimizer() main_opt = chainermn.create_multi_node_optimizer(main_opt, comm) main_opt.setup(hdnnp) # loss function _, kwargs = tc.loss_function loss_function = self.loss_function(hdnnp, properties, **kwargs) observation_keys = loss_function.observation_keys # triggers interval = (tc.interval, 'epoch') stop_trigger = EarlyStoppingTrigger( check_trigger=interval, monitor=f'val/main/{observation_keys[-1]}', patients=tc.patients, mode='min', verbose=self.verbose, max_trigger=(tc.epoch, 'epoch')) # updater and trainer updater = Updater(train_iter, {'main': main_opt, 'master': master_opt}, loss_func=loss_function.eval) out_dir = tc.out_dir / tag trainer = chainer.training.Trainer(updater, stop_trigger, out_dir) # extensions trainer.extend(ext.ExponentialShift('alpha', 1 - tc.lr_decay, target=tc.final_lr, optimizer=master_opt)) evaluator = chainermn.create_multi_node_evaluator( ext.Evaluator(test_iter, hdnnp, eval_func=loss_function.eval), comm) trainer.extend(evaluator, name='val') if tc.scatter_plot: trainer.extend(ScatterPlot(test, hdnnp, comm), trigger=interval) if MPI.rank == 0: if tc.log_report: trainer.extend(ext.LogReport(log_name='training.log')) if tc.print_report: trainer.extend(ext.PrintReport( ['epoch', 'iteration'] + [f'main/{key}' for key in observation_keys] + [f'val/main/{key}' for key in observation_keys])) if tc.plot_report: trainer.extend(ext.PlotReport( [f'main/{key}' for key in observation_keys], x_key='epoch', postprocess=set_log_scale, file_name='training_set.png', marker=None)) trainer.extend(ext.PlotReport( [f'val/main/{key}' for key in observation_keys], x_key='epoch', postprocess=set_log_scale, file_name='validation_set.png', marker=None)) manager = Manager(tag, trainer, result, is_snapshot=True) if self.is_resume: manager.check_to_resume(self.resume_dir.name) if manager.allow_to_run: with manager: trainer.run() if MPI.rank == 0: chainer.serializers.save_npz( tc.out_dir / 'master_nnp.npz', master_nnp) return result
def train(args, train_data, test_data, evaluator_type): required_args = [ 'dataset', 'class_names', 'logs_dir', 'min_size', 'max_size', 'anchor_scales', ] for arg_key in required_args: if not hasattr(args, arg_key): raise ValueError( 'args must contain required key: {}'.format(arg_key) ) assert evaluator_type in ['voc', 'coco'], \ 'Unsupported evaluator_type: {}'.format(evaluator_type) if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print( 'Option --gpu is required without --multi-node.', file=sys.stderr, ) sys.exit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(args.logs_dir, now.strftime('%Y%m%d_%H%M%S')) args.batch_size = args.batch_size_per_gpu * args.n_gpu # lr: 0.00125 * 8 = 0.01 in original args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [ (120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch, ] random.seed(args.seed) np.random.seed(args.seed) if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = cmr.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError( 'Unsupported pooling_func: {}'.format(args.pooling_func) ) if args.initializer == 'normal': mask_initialW = chainer.initializers.Normal(0.01) elif args.initializer == 'he_normal': mask_initialW = chainer.initializers.HeNormal(fan_option='fan_out') else: raise ValueError( 'Unsupported initializer: {}'.format(args.initializer) ) if args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = cmr.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(args.class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, roi_size=args.roi_size, min_size=args.min_size, max_size=args.max_size, mask_initialW=mask_initialW, ) else: raise ValueError('Unsupported model: {}'.format(args.model)) model = cmr.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: # ResNetExtractor.freeze_at is not enough to freeze params # since WeightDecay updates the param little by little. mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn), ) test_data = chainer.datasets.TransformDataset( test_data, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False), ) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=args.batch_size_per_gpu, ) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=args.batch_size_per_gpu, repeat=False, shuffle=False, ) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=converter, ) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out, ) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch', ), ) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if evaluator_type == 'voc': evaluator = cmr.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=args.class_names, ) elif evaluator_type == 'coco': evaluator = cmr.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=args.class_names, ) else: raise ValueError( 'Unsupported evaluator_type: {}'.format(evaluator_type) ) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: # Save snapshot. trainer.extend( extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval, ), ) # Dump params.yaml. args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) # Visualization. trainer.extend( cmr.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=args.class_names, ), trigger=eval_interval, ) # Logging. trainer.extend( chainer.training.extensions.observe_lr(), trigger=log_interval, ) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport( [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ], ), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # Plot. assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval, ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
optimizer = chainermn.create_multi_node_optimizer(chainer.optimizers.MomentumSGD(args.learning_rate), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) num_loaders = 2 train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size, n_processes=num_loaders) test_iter = chainer.iterators.MultiprocessIterator(test, args.batch_size, repeat=False, n_processes=num_loaders) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir) # Evaluate the model with the test dataset for each epoch evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) if comm.rank == 0: if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'],
test_dataset = _preprocess_mnist(test_file, **preprocess_mnist_options) train_iter = chainer.iterators.SerialIterator(train_dataset, args.batch_size) test_iter = chainer.iterators.SerialIterator(test_dataset, args.batch_size, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png'))
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action="store_true", default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help="Type of communicator") parser.add_argument('--stop', '-s', type=str, default="15e", help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default="adam()", help="Optimizer and its argument") parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print("RD source done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print("RD target done. {:.3f} [s]".format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print("Rank {} GPU: {}".format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.mpi_comm.bcast(source_ids, root=0) target_ids = comm.mpi_comm.bcast(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print("target_words : {}".format(len(target_words))) print("source_words : {}".format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write("Error: unknown stop trigger: {}".format( args.stop)) exit(-1) if comm.rank == 0: print("Trigger: {}".format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], 'i') ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def main(): import chainermn chainer.global_config.autotune = True parser = argparse.ArgumentParser(description='ChainerMN example: Train MQAP using 3DCNN') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', action='store_true', help='Resume the training from snapshot') parser.add_argument('--weight', '-w', action='store_true', help='Resume only weight') parser.add_argument('--config', '-c', type=int, default=0, help='Number of config') parser.add_argument('--config_file', type=str, default='./data/config.json', help='Config file path') args = parser.parse_args() if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator, allreduce_grad_dtype='float16') device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 f = open(args.config_file, 'r') config = json.load(f)['Config'][args.config] args.out = os.path.join(args.out, str(args.config)) if comm.rank == 0: print('==========================================') chainer.print_runtime_info() print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num epoch: {}'.format(config['epoch'])) print('Batch size: {}'.format(config['batch_size'] * comm.size)) print('Optimizer: {}'.format(config['optimizer'])) print('Learning Rate: {}'.format(config['learning_rate'])) print('Out Directory: {}'.format(args.out)) print('Vertex feature: {}'.format(config['vertex_feature'])) if config['global_mode']: print('Using Global loss') if config['local_mode']: print('Using local loss') print('Local type : {}'.format(config['local_type'])) print('Local label : {}'.format(config['local_label'])) print('==========================================') d = Dataproc(size=comm.size, rank=comm.rank, config=config) if device >= 0: chainer.cuda.get_device(device).use() # sub_comm = comm.split(comm.rank // comm.intra_size, comm.rank) if config['local_type'] == 'Regression': local_loss_func = F.mean_squared_error else: local_loss_func = F.sigmoid_cross_entropy global_loss_func = F.mean_squared_error model = build_model(config=config, comm=comm) model = Classifier(predictor=model, local_loss_func=local_loss_func, global_loss_func=global_loss_func, config=config) if device >= 0: model.to_gpu() train, test = d.get_dataset(key='train'), d.get_dataset(key='test') train_iter = I.SerialIterator(dataset=train, batch_size=config['batch_size'], repeat=True, shuffle=True) test_iter = I.SerialIterator(dataset=test, batch_size=config['batch_size'], repeat=False, shuffle=False) # train_iter = I.MultiprocessIterator(dataset=train, batch_size=args.batch, repeat=True, shuffle=True, n_processes=10) # test_iter = I.MultiprocessIterator(dataset=test, batch_size=args.batch, repeat=False, shuffle=True, n_processes=10) if config['optimizer'] == 'Adam': optimizer = chainer.optimizers.Adam(alpha=config['learning_rate'], weight_decay_rate=config['weight_decay_rate'], amsgrad=True) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'MomentumSGD': optimizer = chainer.optimizers.MomentumSGD(lr=config['learning_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'SMORMS3': optimizer = chainer.optimizers.SMORMS3(lr=config['learning_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'Eve': from my_optimizer.eve import Eve, create_multi_node_optimizer optimizer = Eve(alpha=config['learning_rate']) optimizer = create_multi_node_optimizer(optimizer, comm, double_buffering=False) elif config['optimizer'] == 'Adabound': from my_optimizer.adabound import Adam as Adabound optimizer = Adabound(alpha=config['learning_rate'], adabound=True, amsgrad=True, weight_decay_rate=config['weight_decay_rate']) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm, double_buffering=False) optimizer.setup(model) val_interval = 1, 'epoch' log_interval = 1, 'epoch' updater = training.StandardUpdater(train_iter, optimizer, device=device, converter=d.get_converter()) trainer = training.Trainer(updater, (config['epoch'], 'epoch'), out=args.out) evaluator = GraphEvaluator(iterator=test_iter, target=model.predictor, device=device, converter=d.get_converter(), comm=comm, local_loss_func=local_loss_func, global_loss_func=global_loss_func, name='val', config=config) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], 'epoch', file_name='loss.png'), trigger=val_interval) report_list = ['epoch', 'main/loss', 'val/main/loss'] if config['global_mode']: report_list.extend(['main/global_loss', 'val/main/global_loss', 'val/main/global_pearson']) trainer.extend(extensions.PlotReport(['main/global_loss', 'val/main/global_loss'], 'epoch', file_name='global_loss.png'), trigger=val_interval) if config['local_mode']: report_list.extend(['main/local_loss', 'val/main/local_loss', 'val/main/local_mean_pearson']) if config['local_type'] == 'Classification': report_list.append('val/main/local_auc') trainer.extend(extensions.PlotReport(['val/main/local_auc'], 'epoch', file_name='local_auc.png'), trigger=val_interval) else: report_list.append('val/main/local_pearson') report_list.append('elapsed_time') trainer.extend(extensions.PrintReport(report_list), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: snap_list = [p for p in os.listdir(args.out) if 'snapshot' in p] snap_num = np.array([int(re.findall("[+-]?[0-9]+[\.]?[0-9]*[eE]?[+-]?[0-9]*", p)[0]) for p in snap_list]) path = snap_list[np.argmax(snap_num)] path = os.path.join(args.out, path) if args.weight: obj_path = 'updater/model:main/predictor/' chainer.serializers.load_npz(path, model.predictor, obj_path) else: chainer.serializers.load_npz(path, trainer) if comm.rank == 0: protein_name_dict = d.get_protein_name_dict() out_path = Path(args.out) if not out_path.exists(): out_path.mkdir(parents=True, exist_ok=True) np.savez(os.path.join(args.out, 'protein_name'), **protein_name_dict) f = open(os.path.join(args.out, 'config.json'), 'w') json.dump(config, f, ensure_ascii=False, indent=4, sort_keys=True, separators=(',', ': ')) f.close() f = open(os.path.join(args.out, 'args.json'), 'w') json.dump(vars(args), f) f.close() if comm.rank == 0: print('train start!!!') trainer.run()
def main(): model_cfgs = { 'resnet50': { 'class': ResNet50, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet101': { 'class': ResNet101, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } }, 'resnet152': { 'class': ResNet152, 'score_layer_name': 'fc6', 'kwargs': { 'arch': 'fb' } } } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to root of the train dataset') parser.add_argument('val', help='Path to root of the validation dataset') parser.add_argument('--model', '-m', choices=model_cfgs.keys(), default='resnet50', help='Convnet models') parser.add_argument('--communicator', type=str, default='pure_nccl', help='Type of communicator') parser.add_argument('--loaderjob', type=int, default=4) parser.add_argument('--batchsize', type=int, default=32, help='Batch size for each worker') parser.add_argument('--lr', type=float) parser.add_argument('--momentum', type=float, default=0.9) parser.add_argument('--weight_decay', type=float, default=0.0001) parser.add_argument('--out', type=str, default='result') parser.add_argument('--epoch', type=int, default=90) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 if comm.rank == 0: print('lr={}: lr is selected based on the linear ' 'scaling rule'.format(lr)) label_names = directory_parsing_label_names(args.train) model_cfg = model_cfgs[args.model] extractor = model_cfg['class'](n_class=len(label_names), **model_cfg['kwargs']) extractor.pick = model_cfg['score_layer_name'] model = Classifier(extractor) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in model.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 train_data = DirectoryParsingLabelDataset(args.train) val_data = DirectoryParsingLabelDataset(args.val) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(extractor.mean)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(extractor.mean)) print('finished loading dataset') if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) optimizer.setup(model) for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): epoch = trainer.updater.epoch_detail warmup_epoch = 5 if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, model, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( extractor, 'snapshot_model_{.updater.epoch}.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError('ImageNet requires GPU support.') archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Start method of multiprocessing module need to be changed if we # are using InfiniBand and MultiprocessIterator. This is because # processes often crash when calling fork if they are using # Infiniband. (c.f., # https://www.open-mpi.org/faq/?category=tuning#fork-warning ) # Also, just setting the start method does not seem to be # sufficient to actually launch the forkserver processes, so also # start a dummy process. # See also our document: # https://chainermn.readthedocs.io/en/stable/tutorial/tips_faqs.html#using-multiprocessiterator # This must be done *before* ``chainermn.create_communicator``!!! multiprocessing.set_start_method('forkserver') p = multiprocessing.Process(target=lambda *x: x, args=()) p.start() p.join() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # A workaround for processes crash should be done before making # communicator above, when using fork (e.g. MultiProcessIterator) # along with Infiniband. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--out', '-o', default=None) parser.add_argument('--config', default=None) parser.add_argument('--resume', default=None) args = parser.parse_args() # gpu communicator comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() # out out = args.out if out is None: timestamp = datetime.datetime.now().strftime('%Y%m%d_%H%M%S') out = osp.join(filepath, 'out', timestamp) # config cfgpath = args.config if cfgpath is None: cfgpath = osp.join(filepath, 'cfg', 'train.yaml') with open(cfgpath, 'r') as f: config = easydict.EasyDict(yaml.load(f)) if comm.rank == 0: os.makedirs(out) shutil.copy(cfgpath, osp.join(out, 'train.yaml')) min_size = config.min_size max_size = config.max_size random_seed = config.random_seed if 'max_epoch' in config: max_epoch = config.max_epoch max_iter = None else: max_epoch = None max_iter = config.max_iter lr = config.lr if 'cooldown_epoch' in config: cooldown_epoch = config.cooldown_epoch cooldown_iter = None else: cooldown_epoch = None cooldown_iter = config.cooldown_iter lr = config.lr lr_cooldown_factor = config.lr_cooldown_factor # set random seed np.random.seed(random_seed) cp.random.seed(random_seed) # model n_class = len(voc_label_names) fcis_model = fcis.models.FCISResNet101(n_class, ratios=(0.5, 1.0, 2.0), anchor_scales=(8, 16, 32), rpn_min_size=16) if args.resume is None: fcis_model.extractor.init_weight() else: chainer.serializers.load_npz(args.resume, fcis_model) model = fcis.models.FCISTrainChain(fcis_model, n_sample=128, bg_iou_thresh_lo=0.1) model.to_gpu() # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=lr, momentum=0.9), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # disable update model.fcis.extractor.res1.disable_update(True, True) model.fcis.extractor.res2.disable_update(True, True) model.fcis.extractor.res3.disable_update(False, True) model.fcis.extractor.res4.disable_update(False, True) model.fcis.extractor.res5.disable_update(False, True) # psroi_conv1 lr model.fcis.head.psroi_conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.psroi_conv1.b.update_rule.add_hook(GradientScaling(3.0)) # dataset if comm.rank == 0: if config.use_sbd: dataset_class = SBDInstanceSegmentationDataset else: dataset_class = VOCInstanceSegmentationDataset train_dataset = dataset_class(split='train') test_dataset = dataset_class(split='val') train_dataset = TransformDataset( train_dataset, Transform(model.fcis, min_size, max_size)) test_dataset = TransformDataset( test_dataset, Transform(model.fcis, min_size, max_size, flip=False)) else: train_dataset = None test_dataset = None train_dataset = chainermn.scatter_dataset(train_dataset, comm, shuffle=True) test_dataset = chainermn.scatter_dataset(test_dataset, comm, shuffle=False) # iterator train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=fcis.dataset.concat_examples, device=device) # interval if max_epoch is not None: max_interval = max_epoch, 'epoch' else: max_interval = max_iter, 'iteration' if cooldown_epoch is not None: cooldown_interval = cooldown_epoch, 'epoch' else: cooldown_interval = cooldown_iter, 'iteration' save_interval = 1, 'epoch' log_interval = 100, 'iteration' print_interval = 20, 'iteration' test_interval = 8, 'epoch' # trainer trainer = chainer.training.Trainer(updater, max_interval, out=out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', lr_cooldown_factor, init=lr), trigger=chainer.training.triggers.ManualScheduleTrigger( *cooldown_interval)) # evaluator trainer.extend(chainermn.create_multi_node_evaluator( chainer.training.extensions.Evaluator( test_iter, model, converter=fcis.dataset.concat_examples, device=device), comm), trigger=test_interval) # logging if comm.rank == 0: snapshot_filename = '{}_model_iter_{{.updater.iteration}}.npz'.format( model.fcis.__class__.__name__) trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename=snapshot_filename), trigger=save_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend( chainer.training.extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(chainer.training.extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/fcis_loc_loss', 'main/fcis_cls_loss', 'main/fcis_mask_loss', 'main/rpn_acc', 'main/fcis_cls_acc', 'main/fcis_fg_acc', 'validation/main/rpn_acc', 'validation/main/fcis_cls_acc', 'validation/main/fcis_fg_acc', ]), trigger=print_interval) trainer.extend( chainer.training.extensions.ProgressBar(update_interval=10)) trainer.extend(chainer.training.extensions.dump_graph('main/loss')) trainer.run() if comm.rank == 0: print('log is saved in {}'.format(out))
def main(): # Start the multiprocessing environment # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # Set up workspace # 12 GB GPU RAM for workspace chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024) # Setup the multi-node environment comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank print( '==> Successfully setup communicator: "{}" rank: {} device: {} size: {}' .format(args.communicator, comm.rank, device, comm.size)) set_random_seed(args, device) # Setup LR if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 # TODO: why? if comm.rank == 0: print( 'LR = {} is selected based on the linear scaling rule'.format( lr)) # Setup dataset train_dir = os.path.join(args.dataset_dir, 'train') val_dir = os.path.join(args.dataset_dir, 'val') label_names = datasets.directory_parsing_label_names(train_dir) train_data = datasets.DirectoryParsingLabelDataset(train_dir) val_data = datasets.DirectoryParsingLabelDataset(val_dir) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(_mean, args)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(_mean, args)) print('==> [{}] Successfully finished loading dataset'.format(comm.rank)) # Initializing dataset iterators if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) # Create the model kwargs = {} if args.first_bn_mixed16 and args.dtype == 'float16': print('==> Setting the first BN layer to mixed16') kwargs['first_bn_mixed16'] = True # Initialize the model net = models.__dict__[args.arch](n_class=len(label_names), **kwargs) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in net.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 # Apply ada loss transform recorder = AdaLossRecorder(sample_per_n_iter=100) # Update the model to support AdaLoss net = AdaLossScaled(net, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformBottleneck(), AdaLossTransformBasicBlock(), AdaLossTransformConv2DBNActiv(), ], verbose=args.verbose) if comm.rank == 0: # print network only in the 1-rank machine print(net) net = L.Classifier(net) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) # Setup optimizer optim = chainermn.create_multi_node_optimizer( optimizers.CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) if args.dtype == 'mixed16': print('==> Using FP32 update for dtype=mixed16') optim.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optim.loss_scaling(interval=args.dynamic_interval, scale=None) else: optim.loss_scaling(interval=float('inf'), scale=None) optim._loss_scale_max = 1.0 # to prevent actual loss scaling optim.setup(net) # setup weight decay for param in net.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) # allocate model to multiple GPUs if device >= 0: chainer.cuda.get_device(device).use() net.to_gpu() # Create an updater that implements how to update based on one train_iter input updater = chainer.training.StandardUpdater(train_iter, optim, device=device) # Setup Trainer stop_trigger = (args.epoch, 'epoch') if args.iter is not None: stop_trigger = (args.iter, 'iteration') trainer = training.Trainer(updater, stop_trigger, out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): """ LR schedule for training ResNet especially. NOTE: lr should be within the context. """ epoch = trainer.updater.epoch_detail warmup_epoch = 5 # NOTE: mentioned the original ResNet paper. if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, net, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) # NOTE: may take snapshot every iteration now snapshot_label = 'epoch' if args.iter is None else 'iteration' snapshot_trigger = (args.snapshot_freq, snapshot_label) snapshot_filename = ('snapshot_' + snapshot_label + '_{.updater.' + snapshot_label + '}.npz') trainer.extend(extensions.snapshot(filename=snapshot_filename), trigger=snapshot_trigger) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'loss_scale', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: serializers.load_npz(args.resume, trainer) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: if comm.rank == 0: stack.enter_context(hook) trainer.run() # store recorded results if comm.rank == 0: # NOTE: only export in the first rank recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--np', '-n', type=int, required=True, help='Minimum number of processes') parser.add_argument('--bind', '-p', type=str, required=True, help='address to bind gRPC server') parser.add_argument('--etcd', '-c', type=str, default='etcd://127.0.0.1:2379/train_mnist.py', help='etcd location and path') args = parser.parse_args() # Prepare ChainerMN communicator. n = args.np bind = args.bind scale_policy = MinMaxPolicy(n, n, block=True) comm = None if args.gpu: from echainer import NcclCommunicator comm = NcclCommunicator(policy=scale_policy, bind=bind, etcd=args.etcd) else: comm = MetaCommunicator(policy=scale_policy, bind=bind, etcd=args.etcd) late = not comm.initial if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPU ', comm.intra_rank) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') device = -1 model = L.Classifier(MLP(args.unit, 10)) if args.gpu: device = comm.intra_rank model.to_gpu(device=device) print('Using GPU ', device) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) done = False retry = False while not done: if args.gpu and retry: device = comm.intra_rank print('Using GPU No.', comm.intra_rank) model.to_gpu(device=device) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. print('get dataset') if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None print('scatter dataset') train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) print('create iterator') train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) trainer.extend(ReformHandler()) trainer.extend(comm.get_monitor()) trainer.extend(comm.get_uninitializer(), trigger=(1, 'iteration')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(echainer.extension.Lineage(comm)) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], log_report='Lineage')) trainer.extend(extensions.ProgressBar()) # Register extension to save trainer's progress (iteration) in communicator # trainer.extend(comm.get_progress_updater()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Optimizer includes model parameters and other params in optimizer comm.register_state('optimizer', optimizer) comm.register_state('model', model) # Iterators: Well if number of nodes changed then current # position becomes wrong but try to recover That's why # recoevering iterators are nonsense. # Trainer: Too large and it includes Iterators. print(updater.epoch, updater.iteration) if retry or late: (iteration, epoch) = comm.fetch_state('optimizer', optimizer) (iteration, epoch) = comm.fetch_state('model', model) train_iter.epoch = epoch updater.iteration = iteration optimizers = trainer.updater.get_all_optimizers() # bcast again anyway for name in optimizers.keys(): optimizers[name].reset_prev_params() try: print('start trainer.run(), ', trainer.updater.iteration, trainer.updater.epoch) trainer.run() done = trainer._done except CommException as ce: print(">>>>>>>>>>>", ce, updater.iteration, updater.epoch) comm.save_all_states(updater.iteration, updater.epoch) # Here comm will be ready to accept fetch state calls and once all # nodes got catched up it'll return and continue to run: TODO comm.sync_cluster(trainer.updater.get_all_optimizers()) retry = True continue except ClusterUpdatedException as ce: print(">>>>>>>>>>>", ce) comm.save_all_states(updater.iteration, updater.epoch) comm.sync_cluster(trainer.updater.get_all_optimizers()) retry = True continue except Exception as e: print("Unexpected >>>>>>>>>>>", e) break # TODO: this should be called cleanly, unless it runs forever somehow... comm.leave()
def get_trainer(args): config = yaml.load(open(args.config)) # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Prepare ChainerMN communicator if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 # Show the setup information if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs - max workspace size:', chainer.cuda.get_max_workspace_size()) print('Using {} communicator'.format(args.communicator)) # Output version info if comm.rank == 0: print('Chainer version: {}'.format(chainer.__version__)) print('ChainerMN version: {}'.format(chainermn.__version__)) print('cuda: {}, cudnn: {}'.format(chainer.cuda.available, chainer.cuda.cudnn_enabled)) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir model_fn = config['model']['module'].split('.')[-1] sys.path.insert(0, args.result_dir) config['model']['module'] = model_fn else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) if comm.rank == 0: print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config, comm) if args.gpu: chainer.cuda.get_device(device).use() model.to_gpu() if comm.rank == 0: print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) if comm.rank == 0: print('optimizer:', optimizer.__class__.__name__) # Setting up datasets if comm.rank == 0: train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) else: train_dataset, valid_dataset = [], [] train_dataset = chainermn.scatter_dataset(train_dataset, comm) valid_dataset = chainermn.scatter_dataset(valid_dataset, comm) # Create iterators # multiprocessing.set_start_method('forkserver') train_iter, valid_iter = create_iterators(train_dataset, valid_dataset, config) if comm.rank == 0: print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater and trainer if 'updater_creator' in config: updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, device=device) else: updater = create_updater(train_iter, optimizer, device=device) if comm.rank == 0: print('updater:', updater.__class__.__name__) # Create Trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) if comm.rank == 0: print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport' and comm.rank == 0: trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr' and comm.rank == 0: trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph' and comm.rank == 0: trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': assert 'module' in values mod = import_module(values['module']) evaluator = getattr(mod, values['name']) if evaluator is extensions.Evaluator: evaluator = evaluator(valid_iter, model, device=device) else: evaluator = evaluator(valid_iter, model.predictor) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport' and comm.rank == 0: trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport' and comm.rank == 0: trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar' and comm.rank == 0: upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot' and comm.rank == 0: filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) if 'lr_drop_poly_power' in config['optimizer']: power = config['optimizer']['lr_drop_poly_power'] stop_trigger = config['stop_trigger'] batchsize = train_iter.batch_size len_dataset = len(train_dataset) trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize, len_dataset), trigger=(1, 'iteration')) # Resume if args.resume is not None: # fn = '{}.bak'.format(args.resume) # shutil.copy(args.resume, fn) serializers.load_npz(args.resume, trainer) if comm.rank == 0: print('Resumed from:', args.resume) if comm.rank == 0: print('==========================================') return trainer
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device_from_id(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, device=device ) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('--model', '-m', choices=['vgg16', 'resnet50', 'resnet101'], default='resnet50', help='base model') parser.add_argument('--pooling-func', '-p', choices=['pooling', 'align', 'resize'], default='align', help='pooling function') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', '-n', action='store_true', help='use multi node') parser.add_argument('--roi-size', '-r', type=int, default=7, help='roi size') args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: if args.gpu is None: print('Option --gpu is required without --multi-node.', file=sys.stderr) quit(1) args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(here, 'logs', now.strftime('%Y%m%d_%H%M%S')) # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # (180e3 * 8) / len(coco_trainval) args.max_epoch = (180e3 * 8) / 118287 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) args.dataset = 'coco' train_data = chainer.datasets.ConcatenatedDataset( mrcnn.datasets.COCOInstanceSegmentationDataset('train'), mrcnn.datasets.COCOInstanceSegmentationDataset('valminusminival'), ) test_data = mrcnn.datasets.COCOInstanceSegmentationDataset( 'minival', use_crowd=True, return_crowd=True, return_area=True) class_names = test_data.class_names train_data = MaskRCNNDataset(train_data) test_data = MaskRCNNDataset(test_data) if args.pooling_func == 'align': pooling_func = mrcnn.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = mrcnn.functions.crop_and_resize else: raise ValueError min_size = 800 max_size = 1333 anchor_scales = (2, 4, 8, 16, 32) if args.model == 'vgg16': mask_rcnn = mrcnn.models.MaskRCNNVGG16( n_fg_class=len(class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, min_size=min_size, max_size=max_size, roi_size=args.roi_size, ) elif args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = mrcnn.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, min_size=min_size, max_size=max_size, roi_size=args.roi_size, ) else: raise ValueError mask_rcnn.use_preset('evaluate') model = mrcnn.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: model.mask_rcnn.extractor.mode = 'res3+' mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() train_data = chainer.datasets.TransformDataset( train_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn)) test_data = chainer.datasets.TransformDataset( test_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn, train=False)) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) # FIXME: MultiProcessIterator sometimes hangs train_iter = chainer.iterators.SerialIterator( train_data, batch_size=1) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=mrcnn.datasets.concat_examples) trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' evaluator = mrcnn.extensions.InstanceSegmentationCOCOEvaluator( test_iter, model.mask_rcnn, device=device, label_names=class_names) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: trainer.extend( extensions.snapshot_object( model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval)) args.git_hash = mrcnn.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend( mrcnn.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=class_names), trigger=eval_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map']), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( ['main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport( ['validation/main/map'], file_name='accuracy.png', trigger=plot_interval ), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='configs/base.yml', help='path to config file') parser.add_argument('--results_dir', type=str, default='./result/', help='directory to save the results to') parser.add_argument('--resume', type=str, default='', help='path to the snapshot') parser.add_argument('--process_num', type=int, default=0) parser.add_argument('--seed', type=int, default=42) args = parser.parse_args() config = yaml_utils.Config( yaml.load(open(args.config_path), Loader=yaml.SafeLoader)) pattern = "-".join([ config.pattern, config.models['classifier']['name'], config.dataset['dataset_name'] ]) comm = chainermn.create_communicator() device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) print('Num Minibatch-size: {}'.format(config.batchsize)) print('Num Epoch: {}'.format(config.epoch)) print('==========================================') # Model classifier = load_models(config.models['classifier']) if args.resume: print("Resume training with snapshot:{}".format(args.resume)) chainer.serializers.load_npz(args.resume, classifier) chainer.cuda.get_device_from_id(device).use() classifier.to_gpu() # models = {"classifier": classifier} # Optimizer opt = make_optimizer(classifier, comm, config) opt.add_hook(chainer.optimizer.WeightDecay(5e-4)) # Dataset if comm.rank == 0: dataset = yaml_utils.load_dataset(config) first_size = int(len(dataset) * config.train_val_split_ratio) train, val = chainer.datasets.split_dataset_random(dataset, first_size, seed=args.seed) else: yaml_utils.load_module(config.dataset['dataset_func'], config.dataset['dataset_name']) train, val = None, None train = chainermn.scatter_dataset(train, comm) val = chainermn.scatter_dataset(val, comm) # Iterator train_iterator = chainer.iterators.SerialIterator(train, config.batchsize) val_iterator = chainer.iterators.SerialIterator(val, config.batchsize, repeat=False, shuffle=False) kwargs = config.updater['args'] if 'args' in config.updater else {} kwargs.update({ 'classifier': classifier, 'iterator': train_iterator, 'optimizer': opt, 'device': device, }) # Updater updater = yaml_utils.load_updater_class(config) updater = updater(**kwargs) out = args.results_dir + '/' + pattern if comm.rank == 0: create_result_dir(out, args.config_path, config) # Trainer trainer = training.Trainer(updater, (config.epoch, 'epoch'), out=out) # Evaluator evaluator = ClassifierEvaluator(val_iterator, classifier, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Learning Rate Schedule (fixed) schedule = [config.epoch * 0.3, config.epoch * 0.6, config.epoch * 0.8] trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=ManualScheduleTrigger(schedule, 'epoch')) report_keys = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] if comm.rank == 0: # Set up logging trainer.extend(extensions.snapshot_object( classifier, 'classifier{}.npz'.format(args.process_num)), trigger=MaxValueTrigger('validation/main/accuracy')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(config.display_interval, 'epoch'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(config.display_interval, 'epoch')) trainer.extend( extensions.ProgressBar( update_interval=config.progressbar_interval)) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = comm.intra_rank else: comm = chainermn.create_communicator('naive') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = -1 if model_comm.size != 2: raise ValueError( 'This example can only be executed on the even number' 'of processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if data_axis == 0: model = L.Classifier(MLP0(model_comm, args.unit)) elif data_axis == 1: model = MLP1(model_comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), data_comm) optimizer.setup(model) # Original dataset on worker 0 and 1. # Datasets of worker 0 and 1 are split and distributed to all workers. if model_axis == 0: train, test = chainer.datasets.get_mnist() if data_axis == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) else: train, test = None, None train = chainermn.scatter_dataset(train, data_comm, shuffle=True) test = chainermn.scatter_dataset(test, data_comm, shuffle=True) train_iter = chainer.iterators.SerialIterator( train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, data_comm) trainer.extend(evaluator) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter) parser.add_argument('dataset', choices=['real', 'synthetic'], help='The dataset.') parser.add_argument('--model', '-m', choices=['vgg16', 'resnet50', 'resnet101'], default='resnet50', help='Base model of Mask R-CNN.') parser.add_argument('--pooling-func', '-pf', choices=['pooling', 'align', 'resize'], default='align', help='Pooling function.') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', '-mn', action='store_true', help='use multi node') parser.add_argument('--max-epoch', type=float, help='Epoch (default: 12.17)') args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join(here, 'logs/train_mrcnn', now.strftime('%Y%m%d_%H%M%S')) # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 if args.max_epoch is None: # (180e3 * 8) / len(coco_trainval) args.max_epoch = (180e3 * 8) / 118287 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) # Default Config min_size = 600 max_size = 1000 anchor_scales = [4, 8, 16, 32] proposal_creator_params = dict( n_train_pre_nms=12000, n_train_post_nms=2000, n_test_pre_nms=6000, n_test_post_nms=1000, min_size=0, ) # if args.dataset == 'voc': # train_data = mrcnn.datasets.SBDInstanceSeg('train') # test_data = mrcnn.datasets.VOC2012InstanceSeg('val') # elif args.dataset == 'coco': # train_data = chainer.datasets.ConcatenatedDataset( # mrcnn.datasets.CocoInstanceSeg('train'), # mrcnn.datasets.CocoInstanceSeg('valminusminival'), # ) # test_data = mrcnn.datasets.CocoInstanceSeg('minival') # train_data.class_names = test_data.class_names # min_size = 800 # max_size = 1333 # else: # raise ValueError # instance_class_names = train_data.class_names[1:] # train_data = mrcnn.datasets.MaskRcnnDataset(train_data) # test_data = mrcnn.datasets.MaskRcnnDataset(test_data) if args.dataset == 'real': train_data = contrib.datasets.ARC2017RealInstancesDataset( 'train', aug='standard') elif args.dataset == 'synthetic': train_data = contrib.datasets.ARC2017SyntheticInstancesDataset( do_aug=True, aug_level='all') else: raise ValueError test_data = contrib.datasets.ARC2017RealInstancesDataset('test') instance_class_names = train_data.class_names[1:] train_data = MaskRcnnDataset(train_data) test_data = MaskRcnnDataset(test_data) if args.pooling_func == 'align': pooling_func = mrcnn.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = mrcnn.functions.crop_and_resize else: raise ValueError if args.model == 'vgg16': mask_rcnn = mrcnn.models.MaskRCNNVGG16( n_fg_class=len(instance_class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, proposal_creator_params=proposal_creator_params, min_size=min_size, max_size=max_size) elif args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = mrcnn.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(instance_class_names), pretrained_model='imagenet', pooling_func=pooling_func, anchor_scales=anchor_scales, proposal_creator_params=proposal_creator_params, min_size=min_size, max_size=max_size) else: raise ValueError mask_rcnn.use_preset('evaluate') model = mrcnn.models.MaskRCNNTrainChain( mask_rcnn, proposal_target_creator=mrcnn.utils.ProposalTargetCreator( n_sample=512), ) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) if args.model in ['resnet50', 'resnet101']: model.mask_rcnn.extractor.mode = 'res3+' mask_rcnn.extractor.conv1.disable_update() mask_rcnn.extractor.bn1.disable_update() mask_rcnn.extractor.res2.disable_update() train_data = chainer.datasets.TransformDataset( train_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn)) test_data = chainer.datasets.TransformDataset( test_data, mrcnn.datasets.MaskRCNNTransform(mask_rcnn, train=False)) if args.multi_node: if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_prefetch=4, shared_mem=10**8) test_iter = chainer.iterators.MultiprocessIterator(test_data, batch_size=1, n_prefetch=4, shared_mem=10**8, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=device, converter=mrcnn.datasets.concat_examples) trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' evaluator = mrcnn.extensions.InstanceSegmentationVOCEvaluator( test_iter, model.mask_rcnn, device=device, use_07_metric=True, label_names=instance_class_names) if args.multi_node: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=eval_interval) if not args.multi_node or comm.rank == 0: trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/map', eval_interval)) args.git_hash = mrcnn.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend(mrcnn.extensions.InstanceSegmentationVisReport( test_iter, model.mask_rcnn, label_names=instance_class_names), trigger=eval_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport([ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', ], file_name='loss.png', trigger=plot_interval), trigger=plot_interval, ) trainer.extend( extensions.PlotReport(['validation/main/map'], file_name='accuracy.png', trigger=plot_interval), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--benchmark', action='store_true', help='benchmark mode') parser.add_argument('--benchmark-iteration', type=int, default=500, help='the number of iterations when using benchmark mode') parser.add_argument('--cprofile', action='store_true', help='cprofile') args = parser.parse_args() multiprocessing.set_start_method('forkserver') p = multiprocessing.Process(target=dummy_func, args=()) p.start() p.join() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob, n_prefetch=args.loaderjob) test_iter = chainer.iterators.MultiprocessIterator( test, args.batchsize, repeat=False, n_processes=args.loaderjob, n_prefetch=args.loaderjob) updater = training.StandardUpdater(train_iter, optimizer, device=device) if args.benchmark: stop_trigger = (args.benchmark_iteration, 'iteration') else: stop_trigger = (args.epoch, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: if args.benchmark: trainer.extend(extensions.LogReport(), trigger=stop_trigger) else: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if args.cprofile: pr = cProfile.Profile() pr.enable() trainer.run() if args.cprofile: pr.disable() s = io.StringIO() sort_by = 'tottime' ps = pstats.Stats(pr, stream=s).sort_stats(sort_by) ps.print_stats() if comm.rank == 0: print(s.getvalue()) pr.dump_stats('{0}/rank_{1}.cprofile'.format(args.out, comm.rank))
def main(): parser = argparse.ArgumentParser(description='Chainer example: seq2seq') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--bleu', action='store_true', default=False, help='Report BLEU score') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--cache', '-c', default=None, help='Directory to cache pre-processed dataset') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1024, help='Number of units') parser.add_argument('--communicator', default='hierarchical', help='Type of communicator') parser.add_argument('--stop', '-s', type=str, default='15e', help='Stop trigger (ex. "500i", "15e")') parser.add_argument('--input', '-i', type=str, default='wmt', help='Input directory') parser.add_argument('--optimizer', type=str, default='adam()', help='Optimizer and its argument') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() # Prepare ChainerMN communicator if args.gpu: comm = chainermn.create_communicator('hierarchical') dev = comm.intra_rank else: comm = chainermn.create_communicator('naive') dev = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('==========================================') # Rank 0 prepares all data if comm.rank == 0: if args.cache and not os.path.exists(args.cache): os.mkdir(args.cache) # Read source data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'source.pickle') source_vocab, source_data = cached_call(cache_file, read_source, args.input, args.cache) else: source_vocab, source_data = read_source(args.input, args.cache) et = time.time() print('RD source done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() # Read target data bt = time.time() if args.cache: cache_file = os.path.join(args.cache, 'target.pickle') target_vocab, target_data = cached_call(cache_file, read_target, args.input, args.cache) else: target_vocab, target_data = read_target(args.input, args.cache) et = time.time() print('RD target done. {:.3f} [s]'.format(et - bt)) sys.stdout.flush() print('Original training data size: %d' % len(source_data)) train_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) < 50 and 0 < len(t) < 50] print('Filtered training data size: %d' % len(train_data)) en_path = os.path.join(args.input, 'dev', 'newstest2013.en') source_data = europal.make_dataset(en_path, source_vocab) fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr') target_data = europal.make_dataset(fr_path, target_vocab) assert(len(source_data) == len(target_data)) test_data = [(s, t) for s, t in six.moves.zip(source_data, target_data) if 0 < len(s) and 0 < len(t)] source_ids = {word: index for index, word in enumerate(source_vocab)} target_ids = {word: index for index, word in enumerate(target_vocab)} else: # target_data, source_data = None, None train_data, test_data = None, None target_ids, source_ids = None, None # Print GPU id for i in range(0, comm.size): if comm.rank == i: print('Rank {} GPU: {}'.format(comm.rank, dev)) sys.stdout.flush() comm.mpi_comm.Barrier() # broadcast id- > word dictionary source_ids = comm.bcast_obj(source_ids, root=0) target_ids = comm.bcast_obj(target_ids, root=0) target_words = {i: w for w, i in target_ids.items()} source_words = {i: w for w, i in source_ids.items()} if comm.rank == 0: print('target_words : {}'.format(len(target_words))) print('source_words : {}'.format(len(source_words))) model = Seq2seq(3, len(source_ids), len(target_ids), args.unit) if dev >= 0: chainer.cuda.get_device_from_id(dev).use() model.to_gpu(dev) # determine the stop trigger m = re.match(r'^(\d+)e$', args.stop) if m: trigger = (int(m.group(1)), 'epoch') else: m = re.match(r'^(\d+)i$', args.stop) if m: trigger = (int(m.group(1)), 'iteration') else: if comm.rank == 0: sys.stderr.write('Error: unknown stop trigger: {}'.format( args.stop)) exit(-1) if comm.rank == 0: print('Trigger: {}'.format(trigger)) optimizer = chainermn.create_multi_node_optimizer( create_optimizer(args.optimizer), comm) optimizer.setup(model) # Broadcast dataset # Sanity check of train_data train_data = chainermn.scatter_dataset(train_data, comm) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, args.batchsize, shuffle=False) updater = training.StandardUpdater( train_iter, optimizer, converter=convert, device=dev) trainer = training.Trainer(updater, trigger, out=args.out) trainer.extend(chainermn.create_multi_node_evaluator( BleuEvaluator(model, test_data, device=dev, comm=comm), comm)) def translate_one(source, target): words = europal.split_sentence(source) print('# source : ' + ' '.join(words)) x = model.xp.array( [source_ids.get(w, 1) for w in words], numpy.int32) ys = model.translate([x])[0] words = [target_words[y] for y in ys] print('# result : ' + ' '.join(words)) print('# expect : ' + target) # @chainer.training.make_extension(trigger=(200, 'iteration')) def translate(trainer): translate_one( 'Who are we ?', 'Qui sommes-nous?') translate_one( 'And it often costs over a hundred dollars ' + 'to obtain the required identity card .', 'Or, il en coûte souvent plus de cent dollars ' + 'pour obtenir la carte d\'identité requise.') source, target = test_data[numpy.random.choice(len(test_data))] source = ' '.join([source_words.get(i, '') for i in source]) target = ' '.join([target_words.get(i, '') for i in target]) translate_one(source, target) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) report = extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/perp', 'validation/main/bleu', 'elapsed_time']) trainer.extend(report, trigger=(1, 'epoch')) comm.mpi_comm.Barrier() if comm.rank == 0: print('start training') sys.stdout.flush() trainer.run()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--batchsize", type=int, default=16, help="batch size") parser.add_argument("--out", default="logs", help="logs") parser.add_argument("--resume", help="resume") args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, "set_start_method"): multiprocessing.set_start_method("forkserver") p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator("pure_nccl") device = comm.intra_rank class_names = morefusion.datasets.ycb_video.class_names fg_class_names = class_names[1:] model = MaskRCNNFPNResNet50(n_fg_class=len(fg_class_names), pretrained_model="imagenet") model_coco = MaskRCNNFPNResNet50(pretrained_model="coco") _copyparams(model, model_coco) model.use_preset("evaluate") train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() if comm.rank == 0: train = chainer.datasets.ConcatenatedDataset( morefusion.datasets.YCBVideoInstanceSegmentationDataset( split="train", sampling=15), morefusion.datasets.YCBVideoSyntheticInstanceSegmentationDataset( bg_composite=True), morefusion.datasets. MySyntheticYCB20190916InstanceSegmentationDataset( # NOQA "train", bg_composite=True), ) train = transform_dataset(train, model, train=True) val = morefusion.datasets.YCBVideoInstanceSegmentationDataset( split="keyframe", sampling=1) val = transform_dataset(val, model, train=False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm, shuffle=False) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize // comm.size, n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4, ) val_iter = chainer.iterators.MultiprocessIterator( val, args.batchsize // comm.size, n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4, shuffle=False, repeat=False, ) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() model.extractor.disable_update() model.rpn.disable_update() for name, link in model.namedlinks(): print(name, link.update_enabled) updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=converter, device=device) max_epoch = (180e3 * 8) / 118287 trainer = training.Trainer(updater, (max_epoch, "epoch"), args.out) @make_shift("lr") def lr_schedule(trainer): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration if iteration < warm_up_duration: rate = (warm_up_rate + (1 - warm_up_rate) * iteration / warm_up_duration) else: rate = 1 for step in [120e3 / 180e3 * max_epoch, 160e3 / 180e3 * max_epoch]: if trainer.updater.epoch_detail >= step: rate *= 0.1 return base_lr * rate trainer.extend(lr_schedule) val_interval = 10000, "iteration" evaluator = InstanceSegmentationCOCOEvaluator(val_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) if comm.rank == 0: log_interval = 10, "iteration" trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) keys = [ "epoch", "iteration", "lr", "main/loss", "main/loss/rpn/loc", "main/loss/rpn/conf", "main/loss/bbox_head/loc", "main/loss/bbox_head/conf", "main/loss/mask_head", "validation/main/map/iou=0.50:0.95/area=all/max_dets=100", ] trainer.extend(extensions.PrintReport(keys), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, "model_iter_best"), trigger=training.triggers.MaxValueTrigger( "validation/main/map/iou=0.50:0.95/area=all/max_dets=100", trigger=val_interval, ), ) trainer.extend( extensions.snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(max_epoch, "epoch"), ) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def check_mnist(gpu, display_log=True): epoch = 5 batchsize = 100 n_units = 100 comm = chainermn.create_communicator('naive') if gpu: device = comm.intra_rank chainer.cuda.get_device(device).use() else: device = -1 model = L.Classifier(MLP(n_units, 10)) if gpu: model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, batchsize) test_iter = chainer.iterators.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (epoch, 'epoch')) # Wrap standard Chainer evaluators by MultiNodeEvaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Add checkpointer. This is just to check checkpointing runs # without errors path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-") checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm, path=path) trainer.extend(checkpointer, trigger=(1, 'epoch')) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0 and display_log: trainer.extend(extensions.LogReport(trigger=(1, 'epoch')), trigger=(1, 'epoch')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ], out=sys.stderr), trigger=(1, 'epoch')) trainer.run() err = evaluator()['validation/main/accuracy'] assert err > 0.95 # Check checkpointer successfully finalized snapshot directory assert [] == os.listdir(path) os.removedirs(path)
def main(): # ===== Argparse ===== # parser = argparse.ArgumentParser() parser.add_argument("--communicator", type=str, default="hierarchical", help="Type of communicator") parser.add_argument("--gpu", "-g", action="store_true", help="Use GPU") parser.add_argument("--batch_size", "-b", type=int, default=4, help="batch size") parser.add_argument("--iteration", "-i", type=int, default=1000, help="# of epochs") parser.add_argument("--units", "-u", type=int, default=5000, help="# of FC units") parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot") parser.add_argument("--data_visual", type=str, default=DATA_DIR_VISUAL, help="Visual data directory, which has csv files") parser.add_argument("--data_speech", type=str, default=DATA_DIR_SPEC, help="Spectrogram data directory, which has npz files") parser.add_argument("--result_dir", type=str, default="result", help="Save directory") args = parser.parse_args() # ===== GPU or CPU ===== # if args.gpu: xp = cuda.cupy comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: comm = chainermn.create_communicator("naive") device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num Minibatch-size: {}'.format(args.batch_size)) print('Num iteration: {}'.format(args.iteration)) print('==========================================') # ===== Load model ===== # if comm.rank == 0: print("loading model...") model = Audio_Visual_Net(gpu=0, num_fusion_units=args.units) if device >= 0: cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # ===== Set data ===== # if comm.rank == 0: print("loading data...") spec_input = sorted(glob.glob(os.path.join(args.data_speech, "*.npz"))) vis_input = sorted(glob.glob(os.path.join(args.data_visual, "*"))) assert len(spec_input) == len( vis_input), "# of files are different between faces and audios." all_nums = range(len(spec_input)) all_nums.remove(5151) random.sample(all_nums, len(all_nums)) threshold = int(len(all_nums) * 0.995) all_nums_train = all_nums[:threshold] all_nums_test = all_nums[threshold:] train = [(i) for i in all_nums_train] test = [(i) for i in all_nums_test] else: train = None test = None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(dataset=train, batch_size=args.batch_size, shuffle=False, repeat=True) test_iter = chainer.iterators.SerialIterator(dataset=test, batch_size=args.batch_size, shuffle=False, repeat=False) # ===== Define trainer ===== # if comm.rank == 0: print("setting trainer...") updater = chainer.training.StandardUpdater(train_iter, optimizer, device=device) trainer = chainer.training.Trainer(updater, (args.iteration, "iteration"), out=args.result_dir) iter_trigger = 10 evaluator = TestModeEvaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(int(iter_trigger), "iteration")) if comm.rank == 0: trainer.extend(extensions.LogReport(trigger=(iter_trigger, "iteration")), trigger=(iter_trigger, "iteration")) trainer.extend( extensions.ProgressBar(update_interval=int(iter_trigger / 10))) trainer.extend( extensions.PlotReport(["main/loss", "validation/main/loss"], "iteration", file_name="loss.png", trigger=(iter_trigger, "iteration"))) trainer.extend(extensions.PrintReport([ "epoch", "iteration", "main/loss", "validation/main/loss", "elapsed_time" ]), trigger=(iter_trigger, "iteration")) trainer.extend(extensions.snapshot(), trigger=(int(iter_trigger * 10), "iteration")) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # ===== Training ===== # if comm.rank == 0: print("start training...") trainer.run() # ===== Save model ===== # if comm.rank == 0: print("saving model...") model.to_cpu() chainer.serializers.save_npz(os.path.join(args.result_dir, "model"), model) chainer.serializers.save_npz(os.path.join(args.result_dir, "optimizer"), optimizer) if comm.rank == 0: print("done!!")
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset( args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # We need to change the start method of multiprocessing module if we are # using InfiniBand and MultiprocessIterator. This is because processes # often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def objective(trial, comm): # Sample an architecture. model = L.Classifier(create_model(trial)) # Setup optimizer. optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(model) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) # Setup dataset and iterator. train, test = chainer.datasets.get_mnist() rng = np.random.RandomState(0) train = chainer.datasets.SubDataset(train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))) test = chainer.datasets.SubDataset(test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test))) train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True) test_iter = chainer.iterators.SerialIterator(test, BATCHSIZE, repeat=False, shuffle=False) # Setup trainer. updater = chainer.training.StandardUpdater(train_iter, optimizer) trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch')) # Add Chainer extension for pruners. trainer.extend( optuna.integration.ChainerPruningExtension(trial, 'validation/main/loss', (PRUNER_INTERVAL, 'epoch'))) evaluator = chainer.training.extensions.Evaluator(test_iter, model) trainer.extend(chainermn.create_multi_node_evaluator(evaluator, comm)) log_report_extension = chainer.training.extensions.LogReport(log_name=None) trainer.extend(log_report_extension) if comm.rank == 0: trainer.extend(chainer.training.extensions.ProgressBar()) # Run training. # Please set show_loop_exception_msg False to inhibit messages about TrialPruned exception. # ChainerPruningExtension raises TrialPruned exception to stop training, and # trainer shows some messages every time it receive TrialPruned. trainer.run(show_loop_exception_msg=False) # Evaluate. evaluator = chainer.training.extensions.Evaluator(test_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) report = evaluator() # The following line mitigates the memory problem in CircleCI # (see https://github.com/pfnet/optuna/pull/325 for more details). gc.collect() return 1.0 - report['main/accuracy']
def main(): parser = argparse.ArgumentParser(description='''\ ChainerMN example: MNIST with automatic checkpoints enabled''') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--run-id', type=str, default='train-mnist-example', help='ID of the task name') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Enable checkpointer and recover from checkpoint if any checkpoint exists checkpointer = create_multi_node_checkpointer(name=args.run_id, comm=comm) checkpointer.maybe_load(trainer, optimizer) print("Rank", comm.rank, ": (Re)Starting from (epoch, iter) =", (trainer.updater.epoch, trainer.updater.iteration)) trainer.extend(checkpointer, trigger=(1000, 'iteration')) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('pure_nccl') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = comm.intra_rank else: comm = chainermn.create_communicator('naive') data_axis, model_axis = comm.rank % 2, comm.rank // 2 data_comm = comm.split(data_axis, comm.rank) model_comm = comm.split(model_axis, comm.rank) device = -1 if model_comm.size != 2: raise ValueError('This example can only be executed on the even number' 'of processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if data_axis == 0: model = L.Classifier(MLP0(model_comm, args.unit)) elif data_axis == 1: model = MLP1(model_comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), data_comm) optimizer.setup(model) # Original dataset on worker 0 and 1. # Datasets of worker 0 and 1 are split and distributed to all workers. if model_axis == 0: train, test = chainer.datasets.get_mnist() if data_axis == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) else: train, test = None, None train = chainermn.scatter_dataset(train, data_comm, shuffle=True) test = chainermn.scatter_dataset(test, data_comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, data_comm) trainer.extend(evaluator) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.DumpGraph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer K-FAC example: MNIST') # NOQA parser.add_argument('--batch_size', type=int, default=100) parser.add_argument('--num_epochs', type=int, default=20) parser.add_argument('--snapshot_interval', type=int, default=-1) parser.add_argument('--no_cuda', action='store_true') parser.add_argument('--out', default='result') parser.add_argument('--resume', default='') parser.add_argument('--optimizer', default='kfac') parser.add_argument('--arch', choices=['mlp', 'cnn'], default='mlp') parser.add_argument('--plot', action='store_true') parser.add_argument('--distributed', action='store_true') args = parser.parse_args() # Prepare communicator if not args.distributed: # Single process execution comm = None rank = 0 device = -1 if args.no_cuda else 0 else: # Multiple processes execution, constructs a communicator. # chainerkfac uses different method to create a communicator from # chainermn. if args.optimizer == 'kfac': comm = chainerkfac.create_communicator('pure_nccl') else: comm = chainermn.create_communicator('pure_nccl') rank = comm.rank device = comm.intra_rank if rank == 0: print('======== DISTRIBUTED TRAINING ========') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.arch == 'mlp': model = L.Classifier(MLP()) in_ndim = 1 # input dimentions else: model = L.Classifier(CNN()) in_ndim = 3 # input dimentions if device >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(device).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer if args.optimizer == 'kfac': if comm is None: optimizer = chainerkfac.optimizers.KFAC() else: optimizer = chainerkfac.optimizers.DistributedKFAC(comm) else: optimizer = chainer.optimizers.Adam() if comm is not None: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) # Load the MNIST dataset if rank == 0: train, test = chainer.datasets.get_mnist(ndim=in_ndim) else: train, test = None, None if comm is not None: train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batch_size) test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.num_epochs, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch evaluator = extensions.Evaluator(test_iter, model, device=device) if comm is not None: evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if rank == 0: # Take a snapshot for each specified epoch snapshot_interval = args.num_epochs \ if args.snapshot_interval == -1 else max(1, args.snapshot_interval) trainer.extend(extensions.snapshot(), trigger=(snapshot_interval, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, # and "validation" refers to the default name of the Evaluator # extension. Entries other than 'epoch' are reported by the Classifier # link, called by either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='ChainerMN example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()