def main(): opt = opts.parse() model = net.ConvNet(opt.n_classes, opt.BC, opt.nobias, opt.dropout_ratio) if opt.gpu > -1: chainer.cuda.get_device_from_id(opt.gpu).use() model.to_gpu() optimizer = optimizers.NesterovAG(lr=opt.LR, momentum=opt.momentum) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(opt.weight_decay)) train_iter, val_iter = dataset.setup(opt) updater = training.StandardUpdater(train_iter, optimizer, device=opt.gpu) # Trainer trainer = training.Trainer(updater, (opt.n_epochs, 'epoch'), opt.save) trainer.extend(extensions.ExponentialShift('lr', 0.1, opt.LR), trigger=ManualScheduleTrigger(opt.schedule, 'epoch')) trainer.extend(extensions.Evaluator(val_iter, model, device=opt.gpu), trigger=(1, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(filename='min_loss'), trigger=MinValueTrigger( key='validation/main/loss', trigger=(5, 'epoch'))) trainer.extend(extensions.snapshot(filename='max_accuracy'), trigger=MaxValueTrigger( key='validation/main/accuracy', trigger=(5, 'epoch'))) trainer.extend(extensions.snapshot_object(model, 'min_loss_model'), trigger=MinValueTrigger(key='validation/main/loss', trigger=(5, 'epoch'))) trainer.extend(extensions.snapshot_object(model, 'max_accuracy_model'), trigger=MaxValueTrigger(key='validation/main/accuracy', trigger=(5, 'epoch'))) trainer.extend(extensions.observe_lr()) trainer.extend(extensions.LogReport()) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PlotReport( ['lr'], 'epoch', file_name='learning_rate.png')) trainer.extend(extensions.PrintReport(['elapsed_time', 'epoch', 'iteration', 'lr', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar(update_interval=25)) if opt.resume and os.path.exists(opt.resume): chainer.serializers.load_npz(opt.resume, trainer) # Run the training try: trainer.run() except Exception as e: import shutil import traceback print('\nerror message') print(traceback.format_exc()) shutil.rmtree(opt.save)
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=0.0005) parser.add_argument('--lr-cooldown-factor', '-lcf', type=float, default=0.1) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() np.random.seed(args.seed) # dataset train_dataset = SBDInstanceSegmentationDataset(split='train') test_dataset = SBDInstanceSegmentationDataset(split='val') # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) # gpu if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # optimizer optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() train_dataset = TransformDataset(train_dataset, Transform(model.fcis)) # iterator train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=args.gpu) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', args.lr_cooldown_factor, init=args.lr), trigger=(args.cooldown_epoch, 'epoch')) # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object(model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): bbox_label_names = ('loop') n_itrs = 70000 n_step = 50000 np.random.seed(0) train_data = DefectDetectionDataset(split='train') test_data = DefectDetectionDataset(split='test') proposal_params = {'min_size': 8} faster_rcnn = FasterRCNNVGG16(n_fg_class=1, pretrained_model='imagenet', ratios=[0.5, 1, 2], anchor_scales=[1, 4, 8, 16], min_size=512, max_size=1024, proposal_creator_params=proposal_params) faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) chainer.cuda.get_device_from_id(0).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=1e-3, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=0) trainer = training.Trainer( updater, (n_itrs, 'iteration'), out='result') trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model_{.updater.iteration}.npz'), trigger=(n_itrs/5, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(n_step, 'iteration')) log_interval = 50, 'iteration' plot_interval = 100, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=5)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True, label_names=bbox_label_names), trigger=ManualScheduleTrigger( [100, 500, 1000, 5000, 10000, 20000, 40000, 60000, n_step, n_itrs], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--config_path', type=str, default='configs/base.yml', help='path to config file') parser.add_argument('--results_dir', type=str, default='./result/', help='directory to save the results to') parser.add_argument('--resume', type=str, default='', help='path to the snapshot') parser.add_argument('--process_num', type=int, default=0) parser.add_argument('--seed', type=int, default=42) args = parser.parse_args() config = yaml_utils.Config( yaml.load(open(args.config_path), Loader=yaml.SafeLoader)) pattern = "-".join([ config.pattern, config.models['classifier']['name'], config.dataset['dataset_name'] ]) comm = chainermn.create_communicator() device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) print('Num Minibatch-size: {}'.format(config.batchsize)) print('Num Epoch: {}'.format(config.epoch)) print('==========================================') # Model classifier = load_models(config.models['classifier']) if args.resume: print("Resume training with snapshot:{}".format(args.resume)) chainer.serializers.load_npz(args.resume, classifier) chainer.cuda.get_device_from_id(device).use() classifier.to_gpu() # models = {"classifier": classifier} # Optimizer opt = make_optimizer(classifier, comm, config) opt.add_hook(chainer.optimizer.WeightDecay(5e-4)) # Dataset if comm.rank == 0: dataset = yaml_utils.load_dataset(config) first_size = int(len(dataset) * config.train_val_split_ratio) train, val = chainer.datasets.split_dataset_random(dataset, first_size, seed=args.seed) else: yaml_utils.load_module(config.dataset['dataset_func'], config.dataset['dataset_name']) train, val = None, None train = chainermn.scatter_dataset(train, comm) val = chainermn.scatter_dataset(val, comm) # Iterator train_iterator = chainer.iterators.SerialIterator(train, config.batchsize) val_iterator = chainer.iterators.SerialIterator(val, config.batchsize, repeat=False, shuffle=False) kwargs = config.updater['args'] if 'args' in config.updater else {} kwargs.update({ 'classifier': classifier, 'iterator': train_iterator, 'optimizer': opt, 'device': device, }) # Updater updater = yaml_utils.load_updater_class(config) updater = updater(**kwargs) out = args.results_dir + '/' + pattern if comm.rank == 0: create_result_dir(out, args.config_path, config) # Trainer trainer = training.Trainer(updater, (config.epoch, 'epoch'), out=out) # Evaluator evaluator = ClassifierEvaluator(val_iterator, classifier, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Learning Rate Schedule (fixed) schedule = [config.epoch * 0.3, config.epoch * 0.6, config.epoch * 0.8] trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=ManualScheduleTrigger(schedule, 'epoch')) report_keys = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] if comm.rank == 0: # Set up logging trainer.extend(extensions.snapshot_object( classifier, 'classifier{}.npz'.format(args.process_num)), trigger=MaxValueTrigger('validation/main/accuracy')) trainer.extend( extensions.LogReport(keys=report_keys, trigger=(config.display_interval, 'epoch'))) trainer.extend(extensions.PrintReport(report_keys), trigger=(config.display_interval, 'epoch')) trainer.extend( extensions.ProgressBar( update_interval=config.progressbar_interval)) # Run the training trainer.run()
def main(): args = parse() np.random.seed(args.seed) print('arguments: ', args) # Model setup if args.dataset == 'coco2017': train_data = COCODataset() test_data = COCODataset(json_file='instances_val2017.json', name='val2017', id_list_file='val2017.txt') if args.extractor == 'vgg16': mask_rcnn = MaskRCNNVGG16(n_fg_class=80, pretrained_model=args.pretrained, roi_size=args.roi_size, roi_align=args.roialign) elif args.extractor == 'resnet50': mask_rcnn = MaskRCNNResNet(n_fg_class=80, pretrained_model=args.pretrained, roi_size=args.roi_size, n_layers=50, roi_align=args.roialign) elif args.extractor == 'resnet101': mask_rcnn = MaskRCNNResNet(n_fg_class=80, pretrained_model=args.pretrained, roi_size=args.roi_size, n_layers=101, roi_align=args.roialign) mask_rcnn.use_preset('evaluate') model = MaskRCNNTrainChain(mask_rcnn, gamma=args.gamma, roi_size=args.roi_size) # Trainer setup if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) #optimizer = chainer.optimizers.Adam()#alpha=0.001, beta1=0.9, beta2=0.999 , eps=0.00000001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) train_data = TransformDataset(train_data, Transform(mask_rcnn)) test_data = TransformDataset(test_data, Transform(mask_rcnn)) train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = SubDivisionUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) # Extensions trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=(args.snapshot, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 10), trigger=ManualScheduleTrigger([args.lr_initialchange], 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.lr_step, 'iteration')) if args.resume is not None: chainer.serializers.load_npz(args.resume, model.mask_rcnn) log_interval = 40, 'iteration' plot_interval = 160, 'iteration' print_interval = 40, 'iteration' trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=(args.validation, 'iteration')) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/avg_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/loss', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=1000)) trainer.extend(extensions.dump_graph('main/loss')) try: trainer.run() except: traceback.print_exc()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate will be multiplied by the number of gpu') parser.add_argument('--lr-cooldown-factor', '-lcf', type=float, default=0.1) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=list, default=[28, 31]) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISPSROIAlignResNet101( n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', args.lr_cooldown_factor, init=args.lr * comm.size), trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.fcis.__class__.__name__ trainer.extend(extensions.snapshot_object( model.fcis, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger(args.cooldown_epoch, 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate will be multiplied by the number of gpu') parser.add_argument('--no-ohem', action='store_true') args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = { 'nms_thresh': 0.7, 'n_train_pre_nms': 12000, 'n_train_post_nms': 2000, 'n_test_pre_nms': 6000, 'n_test_post_nms': 1000, 'force_cpu_nms': False, 'min_size': 0 } fcis = FCISPSROIAlignResNet101( n_fg_class=len(coco_instance_segmentation_label_names), min_size=800, max_size=1333, anchor_scales=(2, 4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') if args.no_ohem: model = FCISTrainChain( fcis, n_ohem_sample=None, proposal_target_creator=ProposalTargetCreator(n_sample=128)) else: model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( ConcatenatedDataset( COCOInstanceSegmentationDataset(split='train'), COCOInstanceSegmentationDataset(split='valminusminival')), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) test_dataset = COCOInstanceSegmentationDataset(split='minival', use_crowded=True, return_crowded=True, return_area=True) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, masks, labels, bboxes, scales indices_concat=[0, 1, 2, 4], # img, masks, labels, _, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (18, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1, init=args.lr * comm.size), trigger=ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 10, 'iteration' # training extensions model_name = model.fcis.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger( [len(train_dataset) * 12, len(train_dataset) * 15], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): experiment_name = "Stacked_16_16_16_16" snapshot_name = "snapshot_iter_27215" config_path = "/efs/fMRI_AE/{}/log/config.yml".format(experiment_name) config = load_config(config_path) config["additional information"]["mask"]["loader"]["params"][ "mask_path"] = path.join( config["additional information"]["mask"]["directory"], config["additional information"]["mask"]["file"]) config["additional information"]["mask"]["loader"]["params"][ "crop"] = config["additional information"]["crop"] snapshot_path = "/efs/fMRI_AE/{}/model/{}".format(experiment_name, snapshot_name) # print("configured as follows:") # print(yaml_dump(config)) while True: s = input("ok? (y/n):") if s == 'y' or s == 'Y': log_config(config, "training start") break elif s == 'n' or s == 'N': destroy_config(config) exit(1) try: try: print("mask loading...") load_mask_module = import_module( config["additional information"]["mask"]["loader"]["module"], config["additional information"]["mask"]["loader"]["package"]) load_mask = getattr( load_mask_module, config["additional information"]["mask"]["loader"]["function"]) mask = load_mask( **config["additional information"]["mask"]["loader"]["params"]) print("done.") print("mask.shape: {}".format(mask.shape)) except FileNotFoundError as e: raise e model_module = import_module(config["model"]["module"], config["model"]["package"]) Model = getattr(model_module, config["model"]["class"]) model = Model(mask=mask, **config["model"]["params"]) finetune_config = config["additional information"]["finetune"] if finetune_config is not None: load_npz(path.join(finetune_config["directory"], finetune_config["file"]), model, strict=False) try: chainer.cuda.get_device_from_id(0).use() gpu = 0 print("transferring model to GPU...") model.to_gpu(gpu) print("GPU enabled") except RuntimeError: gpu = -1 print("GPU disabled") dataset_module = import_module(config["dataset"]["module"], config["dataset"]["package"]) Dataset = getattr(dataset_module, config["dataset"]["class"]) train_dataset = Dataset(**config["dataset"]["train"]["params"]) valid_dataset = Dataset(**config["dataset"]["valid"]["params"]) train_iterator = Iterator(train_dataset, config["batch"]["train"], True, True) valid_iterator = Iterator(valid_dataset, config["batch"]["valid"], False, False) Optimizer = getattr(chainer.optimizers, config["optimizer"]["class"]) optimizer = Optimizer(**config["optimizer"]["params"]) optimizer.setup(model) for hook_config in config["optimizer"]["hook"]: hook_module = import_module(hook_config["module"], hook_config["package"]) Hook = getattr(hook_module, hook_config["class"]) hook = Hook(**hook_config["params"]) optimizer.add_hook(hook) updater = Updater(train_iterator, optimizer, device=gpu) trainer = Trainer(updater, **config["trainer"]["params"]) trainer.extend(snapshot(), trigger=config["trainer"]["snapshot_interval"]) trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=config["trainer"]["model_interval"]) trainer.extend(observe_lr(), trigger=config["trainer"]["log_interval"]) trainer.extend( LogReport([ "epoch", "iteration", "main/loss", "main/pca_loss", "main/reconstruction_loss", "validation/main/loss" ], trigger=config["trainer"]["log_interval"])) trainer.extend(Evaluator(valid_iterator, model, device=gpu), trigger=config["trainer"]["eval_interval"]) trainer.extend(PrintReport([ "epoch", "iteration", "main/loss", "main/pca_loss", "main/reconstruction_loss", "validation/main/loss" ]), trigger=config["trainer"]["log_interval"]) trainer.extend(ProgressBar(update_interval=1)) if "schedule" in config["additional information"].keys(): for i, interval_funcs in enumerate( config["additional information"]["schedule"].items()): interval, funcs = interval_funcs f = lambda trainer, funcs=funcs: [ trainer.updater.get_optimizer('main').target. __getattribute__(func["function"])(*func["params"]) for func in funcs ] trainer.extend(f, name="schedule_{}".format(i), trigger=ManualScheduleTrigger(*interval)) load_npz(snapshot_path, trainer) target = trainer.updater.get_optimizer("main").target target.reset_pca() target.attach_pca() ipca_param = np.load( "/efs/fMRI_AE/Stacked_8_8_8_8_feature/ipca_mean_7920_components_990_7920.npz" ) target.pca.W = chainer.Parameter(ipca_param["components"]) target.pca.bias = chainer.Parameter(ipca_param["mean"]) target.pca.disable_update() target.pca.to_gpu(gpu) target.detach_pca_loss() target.attach_reconstruction_loss() target.release_decoder() target.freeze_encoder() trainer.run() log_config(config, "succeeded") except Exception as e: log_config(config, "unintentional termination") raise e
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) parser.add_argument('--train_data_dir', '-t', default=WIDER_TRAIN_DIR, help='Training dataset (WIDER_train)') parser.add_argument('--train_annotation', '-ta', default=WIDER_TRAIN_ANNOTATION_MAT, help='Annotation file (.mat) for training dataset') parser.add_argument('--val_data_dir', '-v', default=WIDER_VAL_DIR, help='Validation dataset (WIDER_train)') parser.add_argument('--val_annotation', '-va', default=WIDER_VAL_ANNOTATION_MAT, help='Annotation file (.mat) for validation dataset') args = parser.parse_args() np.random.seed(args.seed) # for logging pocessed files logger = logging.getLogger('logger') logger.setLevel(logging.DEBUG) handler = logging.FileHandler(filename='filelog.log') handler.setLevel(logging.DEBUG) logger.addHandler(handler) blacklist = [] with open(BLACKLIST_FILE, 'r') as f: for line in f: l = line.strip() if l: blacklist.append(line.strip()) # train_data = VOCDetectionDataset(split='trainval', year='2007') # test_data = VOCDetectionDataset(split='test', year='2007', # use_difficult=True, return_difficult=True) train_data = WIDERFACEDataset(args.train_data_dir, args.train_annotation, logger=logger, exclude_file_list=blacklist) test_data = WIDERFACEDataset(args.val_data_dir, args.val_annotation) # faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), # pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, transform) #import pdb; pdb.set_trace() #train_iter = chainer.iterators.MultiprocessIterator( # train_data, batch_size=1, n_processes=None, shared_mem=100000000) train_iter = chainer.iterators.SerialIterator( train_data, batch_size=1) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True, label_names=('face',)), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration'), invoke_before_training=False) trainer.extend(extensions.dump_graph('main/loss')) #try: # warnings.filterwarnings('error', category=RuntimeWarning) trainer.run()
def main(config): opts = config() comm = chainermn.create_communicator(opts.communicator) device = comm.intra_rank backborn_cfg = opts.backborn_cfg df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1) ################### pseudo labeling ######################### if opts.pseudo_labeling_path is not None: test_df = pd.read_csv(opts.path_data + opts.test_df) labels = np.load(opts.pseudo_labeling_path, allow_pickle=False) labels = np.concatenate((labels, labels)) count = 0 valid_array = [] valid_sirna = [] for i, label in enumerate(labels): if label.max() > 0.0013: count = count + 1 valid_array.append(i) valid_sirna.append(label.argmax()) print(count) pseudo_df = test_df.iloc[valid_array, :] pseudo_df["sirna"] = valid_sirna pseudo_df = pseudo_df df = pd.concat([df, pseudo_df]).sample(frac=1) ################### pseudo labeling ######################### for i, (train_df, valid_df) in enumerate( stratified_groups_kfold(df, target=opts.fold_target, n_splits=opts.fold)): if comm.rank == 0: train_df.to_csv( opts.path_data + 'train' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) valid_df.to_csv( opts.path_data + 'valid' + '_fold' + str(i) + '.csv', columns=[ 'id_code', 'experiment', 'plate', 'well', 'sirna', 'filename', 'cell', 'site' ]) print("Save a csvfile of fold_" + str(i)) dataset = opts.dataset train_dataset = dataset(train_df, opts.path_data) val_dataset = dataset(valid_df, opts.path_data) backborn = chcv2_get_model( backborn_cfg['name'], pretrained=backborn_cfg['pretrain'], in_size=opts.input_shape)[backborn_cfg['layer']] model = opts.model(backborn=backborn).copy(mode='init') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() mean = opts.mean train_data = TransformDataset(train_dataset, opts.train_transform) val_data = TransformDataset(val_dataset, opts.valid_trainsform) if comm.rank == 0: train_indices = train_data val_indices = val_data else: train_indices = None val_indices = None train_data = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False) train_iter = chainer.iterators.MultiprocessIterator( train_data, opts.batchsize, shuffle=True, n_processes=opts.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val_data, opts.batchsize, repeat=False, shuffle=False, n_processes=opts.loaderjob) print('finished loading dataset') if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() if opts.optimizer == "CorrectedMomentumSGD": optimizer = chainermn.create_multi_node_optimizer( CorrectedMomentumSGD(lr=opts.lr), comm) elif opts.optimizer == "NesterovAG": optimizer = chainermn.create_multi_node_optimizer( NesterovAG(lr=opts.lr), comm) else: optimizer = chainermn.create_multi_node_optimizer( Adam(alpha=opts.alpha, weight_decay_rate=opts.weight_decay, adabound=True, final_lr=0.5), comm) optimizer.setup(model) if opts.optimizer == "CorrectedMomentumSGD": for param in model.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(opts.weight_decay)) if opts.fc_lossfun == 'softmax_cross_entropy': fc_lossfun = F.softmax_cross_entropy elif opts.fc_lossfun == 'focal_loss': if opts.ls: focal_loss = FocalLoss(label_smoothing=True) else: focal_loss = FocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss': if opts.ls: focal_loss = AutoFocalLoss(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss elif opts.fc_lossfun == 'auto_focal_loss_bce': if opts.ls: focal_loss = AutoFocalLossBCE(label_smoothing=True) else: focal_loss = AutoFocalLoss() fc_lossfun = focal_loss.loss if opts.metric_lossfun == 'arcface': arcface = ArcFace() metric_lossfun = arcface.loss elif opts.metric_lossfun == 'adacos': adacos = AdaCos() metric_lossfun = adacos.loss updater = opts.updater(train_iter, optimizer, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w) evaluator = chainermn.create_multi_node_evaluator( opts.evaluator(val_iter, model, device=device, max_epoch=opts.max_epoch, fix_sche=opts.fix_sche, metric_lossfun=metric_lossfun, fc_lossfun=fc_lossfun, metric_w=opts.metric_w, fc_w=opts.fc_w), comm) trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'), out=opts.out + '_fold' + str(i)) if opts.optimizer == "CorrectedMomentumSGD": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) elif opts.optimizer == "NesterovAG": trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) else: trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr), trigger=ManualScheduleTrigger( opts.lr_points, 'epoch')) trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch')) # trainer.extend(evaluator, trigger=(int(1), 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'snapshot_model' + '_{.updater.epoch}.npz'), trigger=(opts.max_epoch / 10, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'snapshot_model_f1max.npz'), trigger=chainer.training.triggers.MaxValueTrigger( 'validation/main/accuracy', trigger=(opts.max_epoch / 10, 'epoch'))) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/face_loss', 'main/ce_loss', 'main/accuracy', 'validation/main/loss', 'validation/main/face_loss', 'validation/main/ce_loss', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate should be multiplied by the number of gpu') parser.add_argument('--epoch', '-e', type=int, default=18) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 fcis = FCISResNet101( n_fg_class=len(coco_instance_segmentation_label_names), anchor_scales=(4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') proposal_target_creator = ProposalTargetCreator() proposal_target_creator.neg_iou_thresh_lo = 0.0 model = FCISTrainChain(fcis, proposal_target_creator=proposal_target_creator) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train') vmml_dataset = COCOInstanceSegmentationDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) # test dataset if comm.rank == 0: test_dataset = COCOInstanceSegmentationDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) indices = np.arange(len(test_dataset)) test_dataset = test_dataset.slice[indices] test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler @make_shift('lr') def lr_scheduler(trainer): base_lr = args.lr iteration = trainer.updater.iteration epoch = trainer.updater.epoch if (iteration * comm.size) < 2000: rate = 0.1 elif epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): config = get_config() # print("configured as follows:") # print(yaml_dump(config)) while True: s = input("ok? (y/n):") if s == 'y' or s == 'Y': log_config(config, "training start") break elif s == 'n' or s == 'N': destroy_config(config) exit(1) try: try: print("mask loading...") load_mask_module = import_module( config["additional information"]["mask"]["loader"]["module"], config["additional information"]["mask"]["loader"]["package"]) load_mask = getattr( load_mask_module, config["additional information"]["mask"]["loader"]["function"]) mask = load_mask( **config["additional information"]["mask"]["loader"]["params"]) print("done.") print("mask.shape: {}".format(mask.shape)) except FileNotFoundError as e: raise e model_module = import_module(config["model"]["module"], config["model"]["package"]) Model = getattr(model_module, config["model"]["class"]) model = Model(mask=mask, **config["model"]["params"]) finetune_config = config["additional information"][ "finetune"] if "finetune" in config[ "additional information"] else None if finetune_config is not None: load_npz(path.join(finetune_config["directory"], finetune_config["file"]), model, strict=False) try: chainer.cuda.get_device_from_id(0).use() gpu = 0 print("transferring model to GPU...") model.to_gpu(gpu) print("GPU enabled") except RuntimeError: gpu = -1 print("GPU disabled") dataset_module = import_module(config["dataset"]["module"], config["dataset"]["package"]) Dataset = getattr(dataset_module, config["dataset"]["class"]) train_dataset = Dataset(**config["dataset"]["train"]["params"]) valid_dataset = Dataset(**config["dataset"]["valid"]["params"]) train_iterator = Iterator(train_dataset, config["batch"]["train"], True, True) valid_iterator = Iterator(valid_dataset, config["batch"]["valid"], False, False) Optimizer = getattr(chainer.optimizers, config["optimizer"]["class"]) optimizer = Optimizer(**config["optimizer"]["params"]) optimizer.setup(model) for hook_config in config["optimizer"]["hook"]: hook_module = import_module(hook_config["module"], hook_config["package"]) Hook = getattr(hook_module, hook_config["class"]) hook = Hook(**hook_config["params"]) optimizer.add_hook(hook) updater = Updater(train_iterator, optimizer, device=gpu) trainer = Trainer(updater, **config["trainer"]["params"]) trainer.extend(snapshot(), trigger=config["trainer"]["snapshot_interval"]) trainer.extend(snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=config["trainer"]["model_interval"]) trainer.extend(observe_lr(), trigger=config["trainer"]["log_interval"]) trainer.extend( LogReport( ["epoch", "iteration", "main/loss", "validation/main/loss"], trigger=config["trainer"]["log_interval"])) trainer.extend(Evaluator(valid_iterator, model, device=gpu), trigger=config["trainer"]["eval_interval"]) trainer.extend(PrintReport( ["epoch", "iteration", "main/loss", "validation/main/loss"]), trigger=config["trainer"]["log_interval"]) trainer.extend(ProgressBar(update_interval=1)) if "schedule" in config["additional information"].keys(): for i, interval_funcs in enumerate( config["additional information"]["schedule"].items()): interval, funcs = interval_funcs f = lambda trainer, funcs=funcs: [ trainer.updater.get_optimizer('main').target. __getattribute__(func["function"])(*func["params"]) for func in funcs ] trainer.extend(f, name="schedule_{}".format(i), trigger=ManualScheduleTrigger(*interval)) trainer.run() log_config(config, "succeeded") except Exception as e: log_config(config, "unintentional termination") raise e
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--lr', '-l', type=float, default=None, help='Learning rate for multi GPUs') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batchsize // comm.size) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): if args.lr is None: base_lr = 0.0005 * args.batchsize else: base_lr = args.lr epoch = trainer.updater.epoch if epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: LightHeadRCNN') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=1234) parser.add_argument('--batch-size', '-b', type=int, default=2) args = parser.parse_args() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) random.seed(args.seed) # model light_head_rcnn = LightHeadRCNNResNet101( pretrained_model='imagenet', n_fg_class=len(coco_bbox_label_names)) light_head_rcnn.use_preset('evaluate') model = LightHeadRCNNTrainChain(light_head_rcnn) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOBboxDataset(year='2014', split='train') vmml_dataset = COCOBboxDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'bbox', 'label', 'scale'), Transform(model.light_head_rcnn)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batch_size) if comm.rank == 0: test_dataset = COCOBboxDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) global_context_module = model.light_head_rcnn.head.global_context_module global_context_module.col_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.light_head_rcnn.extractor.conv1.disable_update() model.light_head_rcnn.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, bboxes, labels, scales indices_concat=[0, 2, 3], # img, _, labels, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (30, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): base_lr = 0.0005 * 1.25 * args.batch_size * comm.size warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration epoch = trainer.updater.epoch if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif epoch < 20: rate = 1 elif epoch < 26: rate = 0.1 else: rate = 0.01 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.light_head_rcnn.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.light_head_rcnn, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionCOCOEvaluator( test_iter, model.light_head_rcnn, label_names=coco_bbox_label_names), trigger=ManualScheduleTrigger([20, 26], 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) train_data = VOCDetectionDataset(split='trainval', year='2007') test_data = VOCDetectionDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_detection_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: model.to_gpu(args.gpu) chainer.cuda.get_device(args.gpu).use() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) def transform(in_data): img, bbox, label = in_data _, H, W = img.shape img = faster_rcnn.prepare(img) _, o_H, o_W = img.shape scale = o_H / H bbox = transforms.resize_bbox(bbox, (H, W), (o_H, o_W)) # horizontally flip img, params = transforms.random_flip(img, x_random=True, return_param=True) bbox = transforms.flip_bbox(bbox, (o_H, o_W), x_flip=params['x_flip']) return img, bbox, label, scale train_data = TransformDataset(train_data, transform) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend( DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_detection_label_names), trigger=ManualScheduleTrigger([args.step_size, args.iteration], 'iteration'), invoke_before_training=False) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def __init__(self, out_name='mcg.dot'): self._out_name = out_name self._hook = LineProfileHook() self.trigger = ManualScheduleTrigger(1, 'iteration')
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_bbox_label_names), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Segmentation model') parser.add_argument('--config', '-c', default='examples/configs/seg_resnet.yaml') parser.add_argument('--out', '-o', default='results', help='Output directory') parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--snapshot', type=int, help='Snapshot interval', default=1) parser.add_argument('--val-set', type=int) parser.add_argument('--predict', action='store_true') parser.add_argument('--benchmark', action='store_true', help='To run benchmark mode') parser.add_argument( '--benchmark-iterations', type=int, default=500, help='the number of iterations when using benchmark mode') parser.add_argument('--cprofile', action='store_true', help='To profile with cprofile') args = parser.parse_args() config = load_config(yaml.load(open(args.config)), dump_yaml_dir=args.out) comm = chainermn.create_communicator(communicator_name='pure_nccl') device = comm.intra_rank + args.gpu cuda.get_device_from_id(device).use() if comm.size != config['n_gpu']: raise ValueError('# of GPUs specified in config file does not match ' 'the actual number of available GPUs. ' 'Expected={} Actual={}'.format( config['n_gpu'], comm.size)) if args.val_set is not None: assert 0 <= args.val_set <= 9 config['val_set'] = args.val_set trainer_stop_trigger = config["epoch"], 'epoch' if args.benchmark: trainer_stop_trigger = args.benchmark_iterations, 'iteration' # Setup model model = setup_model(config, 0) if config.get('resume'): chainer.serializers.load_npz(config['resume'], model) train_chain = TrainChain(model, config['downscale']) train_chain.to_gpu() # Setup dataset if comm.rank == 0: dataset = RSNATrainDataset() # Determine samples to pick up assert config['view_position'] in ('both', 'pa', 'ap', 'no-pa-pos') if config['view_position'] == 'both': mask = np.ones(len(dataset), dtype=bool) elif config['view_position'] == 'no-pa-pos': mask = dataset.patient_df['ViewPosition'].values == 'PA' mask &= dataset.get_mask_for_positive_samples() mask = ~mask else: mask = dataset.patient_df['ViewPosition'].values == 'PA' if config['view_position'] == 'ap': mask = ~mask if config['val_set'] == -1: train_mask = mask & (dataset.patient_df['withinTestRange'].values == 0) train_indices = train_mask.nonzero()[0] val_mask = mask & (dataset.patient_df['withinTestRange'].values == 1) val_indices = val_mask.nonzero()[0] else: train_indices, val_indices = create_train_val_indices( mask, config['val_set']) train_data = dataset.slice[train_indices] val_data = dataset.slice[val_indices] print('train = {}, val = {}'.format(len(train_data), len(val_data))) positive_mask = dataset.get_mask_for_positive_samples()[train_indices] if config['oversampling_rate'] > 1: train_data = oversample_dataset(train_data, positive_mask, config['oversampling_rate']) print('==> train = {} ({}x oversampled with {} positive samples)'. format(len(train_data), config['oversampling_rate'], positive_mask.sum())) else: print('--> no oversampling with {} positive samples'.format( positive_mask.sum())) train_data = TransformDataset(train_data, preprocess) val_data = TransformDataset(val_data, preprocess) # Data augmentation augment = Augment(config['downscale'], config['data_augmentation']) train_data = TransformDataset(train_data, augment) else: train_data, val_data = None, None train_data = chainermn.scatter_dataset(train_data, comm) val_data = chainermn.scatter_dataset(val_data, comm) # Setup iterator, optimizer and updater train_iter = MultiprocessIterator(train_data, batch_size=config['batch_size'], shared_mem=10000000) val_iter = MultiprocessIterator(val_data, batch_size=config['batch_size'], repeat=False, shuffle=False, shared_mem=10000000) optimizer = setup_optimizer(config, comm, train_chain) if not config.get('resume') and config['extractor_freeze_iteration'] != 0: model.unet.extractor.disable_update() updater = chainer.training.updaters.StandardUpdater( train_iter, optimizer, device=device, converter=lambda x, y: chainer.dataset.concat_examples(x, y, 0)) # Setup trainer trainer = chainer.training.Trainer(updater, stop_trigger=trainer_stop_trigger, out=args.out) trainer.extend(setup_lr_scheduler(config), trigger=(1, 'iteration')) if comm.rank == 0: log_interval = 10, 'iteration' print_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=print_interval) entries = ['iteration', 'epoch', 'elapsed_time', 'lr'] measurements = [ 'loss', 'seg_f1', 'seg_loss', 'edge_loss', 'raw_edge_loss' ] entries.extend(['main/{}'.format(x) for x in measurements]) entries.extend(['validation/main/{}'.format(x) for x in measurements]) trainer.extend(extensions.PrintReport(entries), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.snapshot > 0: trainer.extend(extensions.snapshot_object( model, 'model_epoch_{.updater.epoch}.npz'), trigger=(args.snapshot, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'final_model.npz'), trigger=trainer_stop_trigger) evaluator = extensions.Evaluator( val_iter, train_chain, device=device, converter=lambda x, y: chainer.dataset.concat_examples(x, y, 0)) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=(1, 'epoch')) @chainer.training.make_extension(trigger=(1, 'epoch'), priority=-100) def enable_extractor_update(_): print('enable update!') model.unet.extractor.enable_update() if config['extractor_freeze_iteration'] > 0: # no melt if -1 melt_trigger = ManualScheduleTrigger( config['extractor_freeze_iteration'], 'iteration') trainer.extend(enable_extractor_update, trigger=melt_trigger) trainer.run() if args.predict: if comm.rank == 0: commands = [ '--out', '{}/t0.01.csv'.format(args.out), '--model', '{}/final_model.npz'.format(args.out), '--config', args.config, '--val-set', str(config['val_set']), '--gpu', str(args.gpu), '--thresh', '0.01', ] predict.main(commands) commands[1] = '{}/test-t0.01.csv'.format(args.out) commands.append('--test') predict.main(commands)