def schedule_optimizer_value( epoch_list, value_list, optimizer_name="main", attr_name="lr" ): """Set optimizer's hyperparameter according to value_list, scheduled on epoch_list. Example usage: trainer.extend(schedule_optimizer_value([2, 4, 7], [0.008, 0.006, 0.002])) """ if isinstance(epoch_list, list): assert len(epoch_list) == len(value_list) else: assert isinstance(epoch_list, float) or isinstance(epoch_list, int) assert isinstance(value_list, float) or isinstance(value_list, int) epoch_list = [ epoch_list, ] value_list = [ value_list, ] trigger = triggers.ManualScheduleTrigger(epoch_list, "epoch") count = 0 @chainer.training.extension.make_extension(trigger=trigger) def set_value(trainer: Trainer): nonlocal count optimizer = trainer.updater.get_optimizer(optimizer_name) setattr(optimizer, attr_name, value_list[count]) count += 1 return set_value
def trainer_extend(trainer, net, evaluator, small_lr_layers, lr_decay_rate, lr_decay_epoch, epoch_or_iter, save_trainer_interval): def slow_drop_lr(trainer): if small_lr_layers: for layer_name in small_lr_layers: layer = getattr(net.predictor, layer_name) layer.W.update_rule.hyperparam.lr *= lr_decay_rate layer.b.update_rule.hyperparam.lr *= lr_decay_rate # Learning rate trainer.extend(slow_drop_lr, trigger=triggers.ManualScheduleTrigger( lr_decay_epoch, epoch_or_iter)) trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate), trigger=triggers.ManualScheduleTrigger( lr_decay_epoch, epoch_or_iter)) # Observe training trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr(), trigger=(1, epoch_or_iter)) trainer.extend(evaluator, name='val') print_report = [ "epoch", "main/loss", "val/main/miou", "val/main/pixel_accuracy", "val/main/mean_class_accuracy", "lr", "elapsed_time" ] trainer.extend(extensions.PrintReport(print_report)) # save results of training trainer.extend( extensions.PlotReport(['main/loss'], x_key=epoch_or_iter, file_name='loss.png')) trainer.extend( extensions.PlotReport([ 'val/main/miou', 'val/main/pixel_accuracy', 'val/main/mean_class_accuracy' ], x_key=epoch_or_iter, file_name='accuracy.png')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(filename="snapshot_epoch-" + '{.updater.epoch}'), trigger=(save_trainer_interval, epoch_or_iter))
def test_chainer_pruning_extension_trigger(): # type: () -> None study = optuna.create_study() trial = create_running_trial(study, 1.0) extension = ChainerPruningExtension(trial, 'main/loss', (1, 'epoch')) assert isinstance(extension.pruner_trigger, triggers.IntervalTrigger) extension = ChainerPruningExtension(trial, 'main/loss', triggers.IntervalTrigger(1, 'epoch')) assert isinstance(extension.pruner_trigger, triggers.IntervalTrigger) extension = ChainerPruningExtension(trial, 'main/loss', triggers.ManualScheduleTrigger(1, 'epoch')) assert isinstance(extension.pruner_trigger, triggers.ManualScheduleTrigger) with pytest.raises(TypeError): ChainerPruningExtension(trial, 'main/loss', triggers.TimeTrigger(1.))
def test_chainer_pruning_extension_trigger() -> None: study = optuna.create_study() trial = study.ask() extension = ChainerPruningExtension(trial, "main/loss", (1, "epoch")) assert isinstance(extension._pruner_trigger, triggers.IntervalTrigger) extension = ChainerPruningExtension( trial, "main/loss", triggers.IntervalTrigger(1, "epoch") # type: ignore ) assert isinstance(extension._pruner_trigger, triggers.IntervalTrigger) extension = ChainerPruningExtension( trial, "main/loss", triggers.ManualScheduleTrigger(1, "epoch") # type: ignore ) assert isinstance(extension._pruner_trigger, triggers.ManualScheduleTrigger) with pytest.raises(TypeError): ChainerPruningExtension(trial, "main/loss", triggers.TimeTrigger(1.0)) # type: ignore
def main(): parser = argparse.ArgumentParser(description='noisy CIFAR-10 training:') parser.add_argument('--batchsize', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--learnrate', type=float, default=0.2, help='Learning rate for SGD') parser.add_argument('--weight', type=float, default=1e-4, help='Weight decay parameter') parser.add_argument('--epoch', type=int, default=120, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--mean', default='mean.npy', help='Mean image file') parser.add_argument('--label', default='result', help='Directory where the labels obtained in the first step exist') parser.add_argument('--seed', type=int, default=0, help='Random Seed') args = parser.parse_args() np.random.seed(args.seed) train_val_d, test_d = get_cifar10() train_d, val_d = train_val_split(train_val_d, int(len(train_val_d)*0.9)) if os.path.exists(args.mean): mean = np.load(args.mean) else: mean = np.mean([x for x, _ in train_d], axis=0) np.save(args.mean, mean) model = TrainChain() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() updated_labels = np.load('{}/labels.npy'.format(args.label)) updated_soft_labels = np.load('{}/soft_labels.npy'.format(args.label)) train = TrainData(train_d, mean, updated_labels, updated_soft_labels) val = ValData(val_d, mean) test = ValData(test_d, mean) optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trigger_epochs = [int(args.epoch / 3), int(args.epoch * 2 / 3)] trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.learnrate), trigger=triggers.ManualScheduleTrigger(trigger_epochs, 'epoch')) trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu)) trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss','main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run() test_evaluator = extensions.Evaluator(test_iter, model, device=args.gpu) results = test_evaluator() print('Test accuracy:', results['main/accuracy'])
def main(): parser = argparse.ArgumentParser(description='Chainer-PSMNet') parser.add_argument('--maxdisp', type=int, default=192, help='maxium disparity') parser.add_argument('--datapath', default='/home/<username>/datasets/KITTI_stereo/training/', # /home/<username>/datasets/KITTI_stereo/training/ help='datapath') parser.add_argument('--epochs', type=int, default=10, help='number of epochs to train') parser.add_argument('--batchsize', type=int, default=12) parser.add_argument('--gpu0', '-g', type=int, default=-1, help='First GPU ID (negative value indicates CPU)') parser.add_argument('--gpu1', '-G', type=int, default=-1, help='Second GPU ID (negative value indicates CPU)') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--out', default='result_fine/basic') parser.add_argument('--seed', type=int, default=1, metavar='S', help='random seed (default: 1)') parser.add_argument('--model', default='/<modelpath>/model_iter_xxxxx.npz', help='datapath') args = parser.parse_args() print('# GPU: {} ({})'.format(args.gpu0, args.gpu1)) print('# datapath: {}'.format(args.datapath)) print('# epoch: {}'.format(args.epochs)) print('# plot: {}'.format(extensions.PlotReport.available())) print('') # Triggers log_trigger = (3, 'iteration') validation_trigger = (1, 'epoch') lr_change_trigger = (3, 'iteration') # (200, 'epoch') snapshot_trigger = (1, 'epoch') end_trigger = (300, 'epoch') # Dataset # dataloader dataname_list = lt.dataloader(args.datapath) all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = dataname_list # transform train = DA.myImageFolder(all_left_img, all_right_img, all_left_disp, True) test = DA.myImageFolder( test_left_img, test_right_img, test_left_disp, False) # Iterator train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, shuffle=False, repeat=False) # Model model = basic(args.maxdisp, args.gpu0, args.gpu1, training=True, train_type="kitti") if args.gpu0 >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu0).use() # model.to_gpu() # Copy the model to the GPU # load model serializers.load_npz(args.model, model) # Optimizer optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999) optimizer.setup(model) # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # Updater updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu0) # Trainer trainer = training.Trainer(updater, end_trigger, args.out) trainer.extend(extensions.LogReport(trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.dump_graph('main/loss')) # trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu0), # trigger=log_trigger) trainer.extend( extensions.ExponentialShift('alpha', 0.1), trigger=triggers.ManualScheduleTrigger([200], 'epoch')) # plot loss if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( ['main/loss'], x_key='iteration', file_name='loss.png')) # print progression trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss', 'validation/main/loss']), trigger=log_trigger) trainer.extend(extensions.ProgressBar(update_interval=3)) # save model paramter trainer.extend(extensions.snapshot(), trigger=snapshot_trigger) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=snapshot_trigger) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # tensorboard Evaluator # [code] https://qiita.com/katotetsuro/items/6f3d3ebd43a6dee6d2b8 #trainer.extend(TensorBoardReport(args.out), trigger=log_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
else: updater = MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) if args.cosine: trainer.extend( CosineAnnealing('lr', int(args.epoch), len(train) / args.batchsize, eta_min=args.eta_min, init=args.lr)) else: trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( [int(args.epoch * 0.50), int(args.epoch * 0.75)], 'epoch')) test_interval = 1, 'epoch' snapshot_interval = 10, 'epoch' log_interval = 100, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]), trigger=test_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot( filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval)
optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) mst_epochs = [30, 60, 90] if args.cosine: trainer.extend( CosineAnnealing('lr', int(args.epoch), len(train) / args.batchsize, eta_min=args.eta_min, init=args.lr)) else: trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( mst_epochs, 'epoch')) test_interval = 1, 'epoch' snapshot_interval = 10, 'epoch' log_interval = 10, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]), trigger=test_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--batchsize', type=int, default=1) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank faster_rcnn = FasterRCNNVGG16( n_fg_class=len(epic_kitchens_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'bbox', 'label', 'scale'), Transform(faster_rcnn)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.SerialIterator(train, batch_size=args.batchsize) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model.faster_rcnn, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def get_trainer(args): config = yaml.load(open(args.config)) # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Prepare ChainerMN communicator if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 # Show the setup information if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs - max workspace size:', chainer.cuda.get_max_workspace_size()) print('Using {} communicator'.format(args.communicator)) # Output version info if comm.rank == 0: print('Chainer version: {}'.format(chainer.__version__)) print('ChainerMN version: {}'.format(chainermn.__version__)) print('cuda: {}, cudnn: {}'.format(chainer.cuda.available, chainer.cuda.cudnn_enabled)) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir model_fn = config['model']['module'].split('.')[-1] sys.path.insert(0, args.result_dir) config['model']['module'] = model_fn else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) if comm.rank == 0: print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config, comm) if args.gpu: chainer.cuda.get_device(device).use() model.to_gpu() if comm.rank == 0: print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) if comm.rank == 0: print('optimizer:', optimizer.__class__.__name__) # Setting up datasets if comm.rank == 0: train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) else: train_dataset, valid_dataset = [], [] train_dataset = chainermn.scatter_dataset(train_dataset, comm) valid_dataset = chainermn.scatter_dataset(valid_dataset, comm) # Create iterators # multiprocessing.set_start_method('forkserver') train_iter, valid_iter = create_iterators(train_dataset, valid_dataset, config) if comm.rank == 0: print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater and trainer if 'updater_creator' in config: updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, device=device) else: updater = create_updater(train_iter, optimizer, device=device) if comm.rank == 0: print('updater:', updater.__class__.__name__) # Create Trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) if comm.rank == 0: print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport' and comm.rank == 0: trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr' and comm.rank == 0: trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph' and comm.rank == 0: trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': assert 'module' in values mod = import_module(values['module']) evaluator = getattr(mod, values['name']) if evaluator is extensions.Evaluator: evaluator = evaluator(valid_iter, model, device=device) else: evaluator = evaluator(valid_iter, model.predictor) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport' and comm.rank == 0: trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport' and comm.rank == 0: trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar' and comm.rank == 0: upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot' and comm.rank == 0: filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) if 'lr_drop_poly_power' in config['optimizer']: power = config['optimizer']['lr_drop_poly_power'] stop_trigger = config['stop_trigger'] batchsize = train_iter.batch_size len_dataset = len(train_dataset) trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize, len_dataset), trigger=(1, 'iteration')) # Resume if args.resume is not None: # fn = '{}.bak'.format(args.resume) # shutil.copy(args.resume, fn) serializers.load_npz(args.resume, trainer) if comm.rank == 0: print('Resumed from:', args.resume) if comm.rank == 0: print('==========================================') return trainer
def main(): args = parse_args() dump_args(args) # prepare dataset train, val, val_raw = prepare_dataset(full_data=args.full_data) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, shared_mem=4000000) val_iter = chainer.iterators.MultiprocessIterator(val, args.batchsize, repeat=False, shuffle=False, shared_mem=4000000) eval_iter = chainer.iterators.MultiprocessIterator(val_raw, 4, repeat=False, shuffle=False, shared_mem=4000000) # setup model if args.model == 'unet': model = UnetCenterNet() elif args.model == 'res18unet': model = Res18UnetCenterNet() training_model = TrainingModel(model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() training_model.to_gpu() # setup optimizer optimizer = chainer.optimizers.NesterovAG(lr=1e-3) optimizer.setup(training_model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-5)) optimizer.add_hook(chainer.optimizer.GradientClipping(100.)) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # set trainer extensions if not args.full_data: trainer.extend( extensions.Evaluator(val_iter, training_model, device=args.gpu, converter=converter)) trainer.extend(DetectionMapEvaluator(eval_iter, model)) trainer.extend(extensions.snapshot_object(model, 'model_{.updater.epoch}.npz'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend(extensions.LogReport()) if args.full_data: trainer.extend(extensions.PrintReport(['epoch', 'main/loss'])) else: trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'eval/main/map' ])) trainer.extend(extensions.ProgressBar(update_interval=10)) # learning rate scheduling lr_drop_epochs = [int(args.epoch * 0.5), int(args.epoch * 0.75)] lr_drop_trigger = triggers.ManualScheduleTrigger(lr_drop_epochs, 'epoch') trainer.extend(LearningRateDrop(0.1), trigger=lr_drop_trigger) trainer.extend(extensions.observe_lr()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # start training trainer.run()
def train(args): config = yaml.load(open(args.config)) print('==========================================') # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Output version info print('chainer version: {}'.format(chainer.__version__)) print('cuda: {}, cudnn: {}, nccl: {}'.format(chainer.cuda.available, chainer.cuda.cudnn_enabled, HAVE_NCCL)) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config) print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) print('optimizer:', optimizer.__class__.__name__) # Setting up datasets train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) # Prepare devices devices = {'main': args.gpus[0]} for gid in args.gpus[1:]: devices['gpu{}'.format(gid)] = gid # Create iterators train_iter, valid_iter = create_iterators( train_dataset, config['dataset']['train']['batchsize'], valid_dataset, config['dataset']['valid']['batchsize'], devices) print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, devices) print('updater:', updater.__class__.__name__) # Create trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport': trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr': trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph': trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': evaluator_creator = get_evaluator_creator_from_config(values) evaluator = evaluator_creator(valid_iter, model, devices) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport': trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport': trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar': upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot': filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) elif ext == 'ParameterStatistics': links = [] for link_name in values.pop('links'): lns = [ln.strip() for ln in link_name.split('.') if ln.strip()] target = model.predictor for ln in lns: target = getattr(target, ln) links.append(target) trainer.extend(extensions.ParameterStatistics(links, **values)) elif ext == 'custom': custom_extension = get_custum_extension_from_config(values) trainer.extend(custom_extension, trigger=values['trigger']) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) # Resume if args.resume is not None: fn = '{}.bak'.format(args.resume) shutil.copy(args.resume, fn) serializers.load_npz(args.resume, trainer) print('Resumed from:', args.resume) print('==========================================') trainer.run() return 0
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=32800, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=32800, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=100, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # Setting data types if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) # Initialize model if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') # Apply adaptive loss scaling recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) # Finalize the model train_chain = MultiboxTrainChain(model_) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cp.random.seed(0) # NOTE: we have to transfer modules explicitly to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() # Prepare dataset if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) # train_iter = chainer.iterators.MultiprocessIterator( # train, args.batchsize) # , n_processes=8, n_prefetch=2) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) # train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: stack.enter_context(hook) trainer.run() recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): parser = argparse.ArgumentParser( description='Chainer Multi-label classification') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--batchsize', '-b', type=int, default=4, help='Number of images in each mini-batch') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') args = parser.parse_args() model = get_resnet_50(len(voc_bbox_label_names)) model.pick = 'fc6' train_chain = MultiLabelClassifier(model, loss_scale=len(voc_bbox_label_names)) train = VOCBboxDataset(year='2007', split='trainval', use_difficult=False) train = TransformDataset(train, ('img', 'bbox'), bbox_to_multi_label) test = VOCBboxDataset(year='2007', split='test', use_difficult=False) test = TransformDataset(test, ('img', 'bbox'), bbox_to_multi_label) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() train_chain.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(0.001) optimizer.setup(train_chain) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (11, 'epoch') log_interval = (20, 'iteration') updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter) trainer = training.Trainer(updater, stop_trigger, out=args.out) trainer.extend( extensions.Evaluator(test_iter, train_chain, device=args.gpu, converter=converter)) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=triggers.ManualScheduleTrigger([8, 10], 'epoch')) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'lr', 'epoch', 'elapsed_time', 'main/loss', 'main/recall', 'main/precision', 'main/n_pred', 'main/n_pos', 'validation/main/loss', 'validation/main/recall', 'validation/main/precision', 'validation/main/n_pred', 'validation/main/n_pos', ]), trigger=log_interval) trainer.extend(extensions.snapshot_object(model, 'snapshot_model.npz')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): # Parse the arguments. args = parse_arguments() augment = False if args.augment == 'False' else True multi_gpu = False if args.multi_gpu == 'False' else True if args.label: labels = args.label class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and test dataset...') preprocessor = preprocess_method_dict[args.method]() parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args.train_datafile)['dataset'] valid = parser.parse(args.valid_datafile)['dataset'] if augment: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args.net_hidden_dims): net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')]) else: net_hidden_dims = () fp_attention = True if args.fp_attention else False update_attention = True if args.update_attention else False weight_tying = False if args.weight_tying == 'False' else True attention_tying = False if args.attention_tying == 'False' else True fp_batch_normalization = True if args.fp_bn == 'True' else False layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator context = False if args.context == 'False' else True output_activation = functions.relu if args.output_activation == 'relu' else None predictor = set_up_predictor(method=args.method, fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers, concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator, fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization, net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying, update_attention=update_attention, context=context, context_layers=args.context_layers, context_dropout=args.context_dropout, message_function=args.message_function, readout_function=args.readout_function, num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers, output_hidden_dim=args.output_hidden_dim, output_activation=output_activation, symmetric=args.symmetric ) train_iter = SerialIterator(train, args.batchsize) test_iter = SerialIterator(valid, args.batchsize, repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args.gpu) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args.max_norm > 0: optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm)) if args.l2_rate > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate)) if args.l1_rate > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate)) # Set up the updater. if multi_gpu: logging.info('Using multiple GPUs') updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1}, converter=concat_mols) else: logging.info('Using single GPU') updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch')) out = 'output' + '/' + args.out trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(E.Evaluator(test_iter, classifier, device=args.gpu, converter=concat_mols)) train_eval_iter = SerialIterator(train, args.batchsize, repeat=False, shuffle=False) trainer.extend(AccuracyEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(AccuracyEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend(ROCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(ROCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend(PRCAUCEvaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(PRCAUCEvaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend(F1Evaluator( train_eval_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend(F1Evaluator( test_iter, classifier, eval_func=predictor, device=args.gpu, converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args.exp_shift_strategy == 1: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch')) elif args.exp_shift_strategy == 2: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch')) elif args.exp_shift_strategy == 3: trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time'] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: resume_path = os.path.join(out, args.resume) logging.info('Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args.model_filename) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args.protocol)
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('multi_task_300', 'multi_task_512'), default='multi_task_300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--eval_step', type=int, nargs='*', default=[80000, 100000, 120000]) parser.add_argument('--lr_step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--snap_step', type=int, default=10000) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') # in experiments for real experiment parser.add_argument('--resume', type=str) parser.add_argument('--detection', action='store_true', default=False) parser.add_argument('--segmentation', action='store_true', default=False) parser.add_argument('--attention', action='store_true', default=False) parser.add_argument('--dataset', default='voc', type=str) parser.add_argument('--experiment', type=str, default='final_voc') parser.add_argument('--multitask_loss', action='store_true', default=False) parser.add_argument('--dynamic_loss', action='store_true', default=False) parser.add_argument('--log_interval', type=int, default=10) parser.add_argument('--debug', action='store_true', default=False) parser.add_argument('--update_split_interval', type=int, default=100) parser.add_argument( '--loss_split', type=float, default=0.5 ) # in fact for detection, other task(segmentation) is 1-loss_split args = parser.parse_args() snap_step = args.snap_step args.snap_step = [] for step in range(snap_step, args.iteration + 1, snap_step): args.snap_step.append(step) # redefine the output path import os import time args.out = os.path.join(args.out, args.experiment, time.strftime("%Y%m%d_%H%M%S", time.localtime())) if args.model == 'multi_task_300': model = Multi_task_300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) elif args.model == 'multi_task_512': model = Multi_task_512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) model.use_preset('evaluate') if not (args.segmentation or args.detection): raise RuntimeError train_chain = MultiboxTrainChain(model, gpu=args.gpu >= 0, use_multi_task_loss=args.multitask_loss, loss_split=args.loss_split) train_chain.cleargrads() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( Multi_task_VOC(voc_experiments[args.experiment][args.experiment + '_train']), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) test_mask = VOCSemanticSegmentationDataset(split='val') test_mask_iter = chainer.iterators.SerialIterator(test_mask, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) # optimizer.add_hook(GradientClipping(0.1)) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) '''if args.resume: serializers.load_npz(args.resume, trainer)''' trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( args.lr_step, 'iteration')) if args.dataset == 'voc': use_07 = True label_names = voc_bbox_label_names elif args.dataset == 'coco': label_names = coco_bbox_label_names if args.detection and not args.debug: trainer.extend(MultitaskEvaluator(test_iter, model, args.dataset, use_07, label_names=label_names), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) if args.segmentation and not args.debug: trainer.extend(MultitaskEvaluator(test_mask_iter, model, dataset=args.dataset, label_names=label_names, detection=False), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) log_interval = args.log_interval, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) if args.segmentation and args.detection and args.dynamic_loss: trainer.extend( loss_split.LossSplit(trigger=(args.update_split_interval, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask', 'main/loss/loc', 'main/loss/conf', 'main/loss/split' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) if args.resume: if 'model' in args.resume: serializers.load_npz(args.resume, model) else: serializers.load_npz(args.resume, trainer) print(args) trainer.run()
def do(): parser = argparse.ArgumentParser() parser.add_argument( '--model',choices=('ssd300','ssd512'),default='ssd300') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--iteration', type=int, default=64) parser.add_argument('--step', type=int, nargs='*', default=[8,16]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() model = SSD300( n_fg_class=len(ssdd.labels), pretrained_model='imagenet' ) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) """ if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() """ train = TransformDataset( train_dataset, Transform(model.coder,model.insize,model.mean), ) train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize) test = test_dataset test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger= triggers.ManualScheduleTrigger(args.step, 'iteration') ) """ trainer.extend( extensions.Evaluator( test_iter, model ), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration' ) ) """ trainer.extend(extensions.ProgressBar(update_interval=1)) #trainer.extend(extensions.LogReport(trigger=1)) #trainer.extend(extensions.observe_lr(), trigger=1) #trainer.extend(extensions.PrintReport( # ['epoch', 'iteration', 'lr', # 'main/loss', 'main/loss/loc', 'main/loss/conf', # 'validation/main/map', 'elapsed_time']), # trigger=1) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.epoch}.npz'), trigger=(4, 'iteration') ) trainer.run()
dataset = TrainDataset( [VOCDataset(args.root, *t.split('-')) for t in args.train], model) iterator = chainer.iterators.MultiprocessIterator( dataset, args.batchsize, n_processes=2) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(TrainWrapper(model)) optimizer.add_hook(CustomWeightDecay(0.0005, b={'lr': 2, 'decay': 0})) updater = training.StandardUpdater(iterator, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.output) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=0.001), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) snapshot_interval = 1000, 'iteration' log_interval = 10, 'iteration' trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10))
iterator = chainer.iterators.MultiprocessIterator( dataset, args.batchsize, n_processes=2) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(TrainWrapper(model)) optimizer.add_hook(CustomWeightDecay(0.0005, b={'lr': 2, 'decay': 0})) updater = training.StandardUpdater(iterator, optimizer, device=args.gpu) trainer = training.Trainer( updater, (120000*32/args.batchsize, 'iteration'), args.output) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=0.001), trigger=triggers.ManualScheduleTrigger( [80000*32/args.batchsize, 100000*32/args.batchsize], 'iteration')) snapshot_interval = 1000, 'iteration' log_interval = 10, 'iteration' trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10))
self.saved_dir = saved_dir self.before_iter = before_iter self.save_after = save_after def __call__(self, trainer): curr_iter = trainer.updater.iteration + self.before_iter if curr_iter > self.save_after: chainer.serializers.save_npz( self.saved_dir + self.save_name[:-4] + '_' + str(curr_iter) + '.npz', model, ) steps = [200000, 400000] lr_trigger = triggers.ManualScheduleTrigger(steps, 'iteration') updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (iters, 'iteration'), 'ssd_result') trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_trigger) #trainer.extend(evaluator,trigger=(50000, 'iteration')) trainer.extend( training.extensions.LogReport(log_name='ssd_report' + SAVE_PATH, trigger=(1000, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=(1000, 'iteration')) trainer.extend( training.extensions.PrintReport( ['iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf'])) trainer.extend(save_model(model, SAVE_PATH, save_after=0),
def get_trainer(args): config = yaml.load(open(args.config)) # Set workspace size if 'max_workspace_size' in config: chainer.cuda.set_max_workspace_size(config['max_workspace_size']) # Show the setup information print('==========================================') print('Chainer version: {}'.format(chainer.__version__)) print('CuPy version: {}'.format(chainer.cuda.cupy.__version__)) print('cuda: {}, cudnn: {}, nccl: {}'.format( chainer.cuda.available, chainer.cuda.cudnn_enabled, )) # Prepare devices print('Devices:') devices = {'main': args.gpus[0]} print('\tmain:', args.gpus[0]) for gid in args.gpus[1:]: devices['gpu{}'.format(gid)] = gid print('\tgpu{}'.format(gid), gid) # Create result_dir if args.result_dir is not None: config['result_dir'] = args.result_dir model_fn = config['model']['module'].split('.')[-1] sys.path.insert(0, args.result_dir) config['model']['module'] = model_fn else: config['result_dir'] = create_result_dir_from_config_path(args.config) log_fn = save_config_get_log_fn(config['result_dir'], args.config) print('result_dir:', config['result_dir']) # Instantiate model model = get_model_from_config(config) print('model:', model.__class__.__name__) # Initialize optimizer optimizer = get_optimizer_from_config(model, config) print('optimizer:', optimizer.__class__.__name__) # Setting up datasets train_dataset, valid_dataset = get_dataset_from_config(config) print('train_dataset: {}'.format(len(train_dataset)), train_dataset.__class__.__name__) print('valid_dataset: {}'.format(len(valid_dataset)), valid_dataset.__class__.__name__) # Create iterators train_iter, valid_iter = create_iterators(train_dataset, valid_dataset, config) print('train_iter:', train_iter.__class__.__name__) print('valid_iter:', valid_iter.__class__.__name__) # Create updater and trainer if 'updater_creator' in config: updater_creator = get_updater_creator_from_config(config) updater = updater_creator(train_iter, optimizer, devices) else: updater = create_updater(train_iter, optimizer, devices) print('updater:', updater.__class__.__name__) # Create Trainer trainer = training.Trainer(updater, config['stop_trigger'], out=config['result_dir']) print('Trainer stops:', config['stop_trigger']) # Trainer extensions for ext in config['trainer_extension']: ext, values = ext.popitem() if ext == 'LogReport': trigger = values['trigger'] trainer.extend( extensions.LogReport(trigger=trigger, log_name=log_fn)) elif ext == 'observe_lr': trainer.extend(extensions.observe_lr(), trigger=values['trigger']) elif ext == 'dump_graph': trainer.extend(extensions.dump_graph(**values)) elif ext == 'Evaluator': assert 'module' in values mod = import_module(values['module']) evaluator = getattr(mod, values['name']) if evaluator is extensions.Evaluator: evaluator = evaluator(valid_iter, model, device=args.gpus[0]) else: evaluator = evaluator(valid_iter, model.predictor) trainer.extend(evaluator, trigger=values['trigger'], name=values['prefix']) elif ext == 'PlotReport': trainer.extend(extensions.PlotReport(**values)) elif ext == 'PrintReport': trigger = values.pop('trigger') trainer.extend(extensions.PrintReport(**values), trigger=trigger) elif ext == 'ProgressBar': upd_int = values['update_interval'] trigger = values['trigger'] trainer.extend(extensions.ProgressBar(update_interval=upd_int), trigger=trigger) elif ext == 'snapshot': filename = values['filename'] trigger = values['trigger'] trainer.extend(extensions.snapshot(filename=filename), trigger=trigger) elif ext == 'ParameterStatistics': links = [] for link_name in values.pop('links'): lns = [ln.strip() for ln in link_name.split('.') if ln.strip()] target = model.predictor for ln in lns: target = getattr(target, ln) links.append(target) trainer.extend(extensions.ParameterStatistics(links, **values)) elif ext == 'custom': custom_extension = get_custum_extension_from_config(values) trainer.extend(custom_extension) # LR decay if 'lr_drop_ratio' in config['optimizer'] \ and 'lr_drop_triggers' in config['optimizer']: ratio = config['optimizer']['lr_drop_ratio'] points = config['optimizer']['lr_drop_triggers']['points'] unit = config['optimizer']['lr_drop_triggers']['unit'] drop_trigger = triggers.ManualScheduleTrigger(points, unit) def lr_drop(trainer): trainer.updater.get_optimizer('main').lr *= ratio trainer.extend(lr_drop, trigger=drop_trigger) if 'lr_drop_poly_power' in config['optimizer']: power = config['optimizer']['lr_drop_poly_power'] stop_trigger = config['stop_trigger'] batchsize = train_iter.batch_size len_dataset = len(train_dataset) trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize, len_dataset), trigger=(1, 'iteration')) # Resume if args.resume is not None: serializers.load_npz(args.resume, trainer) print('Resumed from:', args.resume) print('==========================================') return trainer
for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=gpu_id) trainer = training.Trainer( updater, (training_epoch, 'epoch'), out) trainer.extend( extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger(lr_decay_timing, 'epoch')) trainer.extend( DetectionVOCEvaluator( valid_iter, model, use_07_metric=False, label_names=bball_labels), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map', 'elapsed_time']), trigger=log_interval) if extensions.PlotReport.available(): trainer.extend(
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--labelnum', type=int, default=50) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--image_label', '-il', help='Path to training image-label list file') parser.add_argument('--bbox', help='Path to training bbox list file') parser.add_argument('--image_label_test', '-ilt', help='Path to training image-label list file') parser.add_argument('--bbox_test', help='Path to training bbox list file') parser.add_argument('--image_root', '-TR', default='.', help='Root directory path of image files') args = parser.parse_args() comm = chainermn.create_communicator('naive') if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.model == 'ssd300': model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() from test_datasets import DeepFashionBboxDataset if comm.rank == 0: train = DeepFashionBboxDataset(args.bbox, args.image_label, args.image_root) test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test, args.image_root) train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) checkpoint_interval = (1000, 'iteration') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) evaluator = DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(10000, 'iteration')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def handler(context): dataset_alias = context.datasets trainval_2007_dataset_id = dataset_alias['trainval2007'] trainval_2012_dataset_id = dataset_alias['trainval2012'] test_2007_dataset_id = dataset_alias['test2007'] trainval_2007_dataset = list( load_dataset_from_api(trainval_2007_dataset_id)) trainval_2012_dataset = list( load_dataset_from_api(trainval_2012_dataset_id)) test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id)) if network_model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif network_model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset) trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset) test_2007 = DetectionDatasetFromAPI(test_2007_dataset, use_difficult=True, return_difficult=True) train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_2007, BATCHSIZE, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_iterations, 'iteration'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) print_entries = [ 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(nb_iterations, 'iteration')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--model', '-m', type=str, default=None) parser.add_argument('--opt', type=str, default=None) parser.add_argument('--epoch', '-e', type=int, default=10) parser.add_argument('--lr', '-l', type=float, default=0.001) parser.add_argument('--batch', '-b', type=int, default=32) parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() print("Loading datas") max_value = 200 train, validation = LoadData(max_value=max_value, N=20000, validation_split=True) # Set up a neural network to train. print("Building model") model = NaluLayer(2, 1) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = optimizers.Adam(alpha=args.lr) optimizer.setup(model) train_iter = iterators.SerialIterator(train, batch_size=args.batch, shuffle=True) test_iter = iterators.SerialIterator(validation, batch_size=args.batch, repeat=False, shuffle=False) if args.model != None: print("loading model from " + args.model) serializers.load_npz(args.model, model) if args.opt != None: print("loading opt from " + args.opt) serializers.load_npz(args.opt, optimizer) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'))) # Snapshot trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) #serializers.load_npz('./results/snapshot_iter_1407', trainer) # Decay learning rate points = [args.epoch * 0.75] trainer.extend(extensions.ExponentialShift('alpha', 0.1), trigger=triggers.ManualScheduleTrigger(points, 'epoch')) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss']), trigger=(1, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=1)) #Plot computation graph trainer.extend(extensions.dump_graph('main/loss')) # Train trainer.run() # Save results modelname = "./results/model" print("saving model to " + modelname) serializers.save_npz(modelname, model) optimizername = "./results/optimizer" print("saving optimizer to " + optimizername) serializers.save_npz(optimizername, optimizer) # Estimate model model = model = NaluLayer(2, 1, return_prediction=True) weight_dir = "./results/model" print("Loading model from " + weight_dir) serializers.load_npz(weight_dir, model) n_test = 10 test = LoadData(N=n_test, validation_split=False) loss, y = model(test) y = cuda.to_cpu(y.data) #print(test[0], y[0]) for i in range(n_test): print('-' * 10) print('Q: ', round(test[i, 0] * max_value), 'x', round(test[i, 1] * max_value)) print('A: ', round(test[i, 2] * (max_value**2))) print('P: ', round(y[i, 0] * (max_value**2))) print('-' * 10)
def main(): # Parse the arguments. args = parse_arguments() if args['label']: labels = args['label'] class_num = len(labels) if isinstance(labels, list) else 1 else: raise ValueError('No target label was specified.') # Dataset preparation. Postprocessing is required for the regression task. def postprocess_label(label_list): label_arr = np.asarray(label_list, dtype=np.int32) return label_arr # Apply a preprocessor to the dataset. logging.info('Preprocess train dataset and valid dataset...') # use `ggnn` for the time being preprocessor = preprocess_method_dict['ggnn']() # parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label, # labels=labels, smiles_cols=['smiles_1', 'smiles_2']) if args['feature'] == 'molenc': parser = MolAutoencoderParserForPair( preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) if args['feature'] == 'ssp': parser = SSPParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) else: parser = Mol2VecParserForPair(preprocessor, postprocess_label=postprocess_label, labels=labels, smiles_cols=['smiles_1', 'smiles_2']) train = parser.parse(args['train_datafile'])['dataset'] valid = parser.parse(args['valid_datafile'])['dataset'] if args['augment']: logging.info('Utilizing data augmentation in train set') train = augment_dataset(train) num_train = train.get_datasets()[0].shape[0] num_valid = valid.get_datasets()[0].shape[0] logging.info('Train/test split: {}/{}'.format(num_train, num_valid)) if len(args['net_hidden_dims']): net_hidden_dims = tuple([ int(net_hidden_dim) for net_hidden_dim in args['net_hidden_dims'].split(',') ]) else: net_hidden_dims = () predictor = set_up_predictor(fp_out_dim=args['fp_out_dim'], net_hidden_dims=net_hidden_dims, class_num=class_num, sim_method=args['sim_method'], symmetric=args['symmetric']) train_iter = SerialIterator(train, args['batchsize']) test_iter = SerialIterator(valid, args['batchsize'], repeat=False, shuffle=False) metrics_fun = {'accuracy': F.binary_accuracy} classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy, metrics_fun=metrics_fun, device=args['gpu']) # Set up the optimizer. optimizer = optimizers.Adam(alpha=args['learning_rate'], weight_decay_rate=args['weight_decay_rate']) # optimizer = optimizers.Adam() # optimizer = optimizers.SGD(lr=args.learning_rate) optimizer.setup(classifier) # add regularization if args['max_norm'] > 0: optimizer.add_hook( chainer.optimizer.GradientClipping(threshold=args['max_norm'])) if args['l2_rate'] > 0: optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args['l2_rate'])) if args['l1_rate'] > 0: optimizer.add_hook(chainer.optimizer.Lasso(rate=args['l1_rate'])) updater = training.StandardUpdater(train_iter, optimizer, device=args['gpu'], converter=concat_mols) # Set up the trainer. logging.info('Training...') # add stop_trigger parameter early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=10, max_trigger=(500, 'epoch')) out = 'output' + '/' + args['out'] trainer = training.Trainer(updater, stop_trigger=early_stop, out=out) trainer.extend( E.Evaluator(test_iter, classifier, device=args['gpu'], converter=concat_mols)) train_eval_iter = SerialIterator(train, args['batchsize'], repeat=False, shuffle=False) trainer.extend( AccuracyEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_acc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( AccuracyEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_acc', pos_labels=1, ignore_labels=-1)) trainer.extend( ROCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_roc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( ROCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_roc', pos_labels=1, ignore_labels=-1)) trainer.extend( PRCAUCEvaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_prc', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( PRCAUCEvaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_prc', pos_labels=1, ignore_labels=-1)) trainer.extend( F1Evaluator(train_eval_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='train_f', pos_labels=1, ignore_labels=-1, raise_value_error=False)) # extension name='validation' is already used by `Evaluator`, # instead extension name `val` is used. trainer.extend( F1Evaluator(test_iter, classifier, eval_func=predictor, device=args['gpu'], converter=concat_mols, name='val_f', pos_labels=1, ignore_labels=-1)) # apply shift strategy to learning rate every 10 epochs # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch')) if args['exp_shift_strategy'] == 1: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [10, 20, 30, 40, 50, 60], 'epoch')) elif args['exp_shift_strategy'] == 2: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30], 'epoch')) elif args['exp_shift_strategy'] == 3: trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']), trigger=triggers.ManualScheduleTrigger( [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch')) else: raise ValueError('No such strategy to adapt learning rate') # # observation of learning rate trainer.extend(E.observe_lr(), trigger=(1, 'iteration')) entries = [ 'epoch', 'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc', # 'train_p/main/precision', 'train_r/main/recall', 'train_f/main/f1', 'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc', # 'val_p/main/precision', 'val_r/main/recall', 'val_f/main/f1', 'lr', 'elapsed_time' ] trainer.extend(E.PrintReport(entries=entries)) # change from 10 to 2 on Mar. 1 2019 trainer.extend(E.snapshot(), trigger=(2, 'epoch')) trainer.extend(E.LogReport()) trainer.extend(E.ProgressBar()) trainer.extend( E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png')) if args['resume']: resume_path = os.path.join(out, args['resume']) logging.info( 'Resume training according to snapshot in {}'.format(resume_path)) chainer.serializers.load_npz(resume_path, trainer) trainer.run() # Save the regressor's parameters. model_path = os.path.join(out, args['model_filename']) logging.info('Saving the trained models to {}...'.format(model_path)) classifier.save_pickle(model_path, protocol=args['protocol'])
def create_extension(trainer, test_iter, model, config, devices=None): """Create extension for training models""" for key, ext in config.items(): if key == "Evaluator": cl = get_class(ext['module']) Evaluator = getattr(cl, ext['name']) trigger = parse_trigger(ext['trigger']) args = parse_dict(ext, 'args', {}) if parse_dict(args, 'label_names', 'voc') == 'voc': args['label_names'] = voc_bbox_label_names trainer.extend(Evaluator(test_iter, model, **args), trigger=trigger) elif key == "dump_graph": cl = getattr(extensions, key) trainer.extend(cl(ext['name'])) elif key == 'snapshot': cl = getattr(extensions, key) trigger = parse_trigger(ext['trigger']) trainer.extend(cl(), trigger=trigger) elif key == 'snapshot_object': cl = getattr(extensions, key) trigger = parse_trigger(ext['trigger']) args = parse_dict(ext, 'args', {}) if args: if args['method'] == 'best': trigger = triggers.MaxValueTrigger(args['name'], trigger) trainer.extend(cl(model, 'yolov2_{.updater.iteration}'), trigger=trigger) elif key == 'LogReport': cl = getattr(extensions, key) trigger = parse_trigger(ext['trigger']) trainer.extend(cl(trigger=trigger)) elif key == "PrintReport": cl = getattr(extensions, key) report_list = ext['name'].split(' ') trigger = parse_trigger(ext['trigger']) trainer.extend(cl(report_list), trigger=trigger) elif key == "ProgressBar": cl = getattr(extensions, key) trainer.extend(cl(update_interval=ext['update_interval'])) elif key == 'observe_lr': cl = getattr(extensions, key) trigger = parse_trigger(ext['trigger']) trainer.extend(cl(), trigger=trigger) elif key == "PolynomialShift": cl = getattr(lr_utils, key) trigger = parse_trigger(ext['trigger']) len_dataset = len(trainer.updater.get_iterator('main').dataset) batchsize = trainer.updater.get_iterator('main').batch_size args = parse_dict(ext, 'args', {}) args.update({ 'len_dataset': len_dataset, 'batchsize': batchsize, 'stop_trigger': trainer.stop_trigger }) trainer.extend(cl(**args)) elif key == "DarknetLRScheduler": cl = getattr(lr_utils, key) args = parse_dict(ext, 'args', {}) args['step_trigger'] = [int(num) for num in args['step_trigger']] trainer.extend(cl(**args)) elif key == "ExponentialShift": cl = getattr(extensions, key) attr = ext['attr'] rate = ext['rate'] name = ext['name'] numbers = [int(num) for num in ext['numbers']] trainer.extend(cl(attr, rate), trigger=triggers.ManualScheduleTrigger( numbers, name)) return trainer