def test_basic(self): dataset = ConcatenatedDataset(self.dataset, self.dataset) self.assertIsInstance(dataset, SliceableDataset) self.assertEqual(len(dataset), len(self.dataset) * 2) self.assertEqual(dataset.keys, ('item0', 'item1', 'item2')) self.assertEqual( dataset[1], ('item0(1)', 'item1(1)', 'item2(1)')) self.assertEqual( dataset[11], ('item0(1)', 'item1(1)', 'item2(1)'))
def oversample_dataset(dataset, mask, rate): """Perform oversampling. Args: dataset (SliceableDataset): Source dataset. mask (~numpy.ndarray): `mask[i]` must be `True` iff `i`-th sample of `dataset` is a target of oversampling. rate (int): Target samples will appear `rate` times in the dataset after oversampling. Returns: ConcatenatedDataset: Dataset after oversampling. Its elements will not be shuffled. """ target_data = dataset.slice[mask.nonzero()[0]] datasets = [dataset] for _ in range(rate - 1): datasets.append(target_data) return ConcatenatedDataset(*datasets)
def __init__(self, conf, mode='train'): assert mode == 'train' or mode == 'val', 'mode shoulde be train or val' if mode == 'train': self.training = True self.orig_dataset = ConcatenatedDataset( COCOBboxDataset(data_dir=conf.coco_path, split='train'), COCOBboxDataset(data_dir=conf.coco_path, split='valminusminival')) print('train dataset imported') else: self.training = False self.orig_dataset = COCOBboxDataset(data_dir=conf.coco_path, split='minival', use_crowded=True, return_crowded=True, return_area=True) print('minival dataset imported') self.pair = namedtuple('pair', ['img', 'bboxes', 'labels', 'scale']) self.maps = conf.maps self.min_sizes = conf.min_sizes self.max_size = conf.max_size self.mean = conf.mean
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('resnet50', 'resnet101')) parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet50(pretrained_model='imagenet', arch='he')) elif args.model == 'resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet101(pretrained_model='imagenet', arch='he')) model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( ConcatenatedDataset( COCOBboxDataset(split='train'), COCOBboxDataset(split='valminusminival'), ), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) def lr_schedule(updater): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif iteration < 60000 * 16 / args.batchsize: rate = 1 elif iteration < 80000 * 16 / args.batchsize: rate = 0.1 else: rate = 0.01 return base_lr * rate trainer.extend(ManualScheduler('lr', lr_schedule)) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate will be multiplied by the number of gpu') parser.add_argument('--no-ohem', action='store_true') args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = { 'nms_thresh': 0.7, 'n_train_pre_nms': 12000, 'n_train_post_nms': 2000, 'n_test_pre_nms': 6000, 'n_test_post_nms': 1000, 'force_cpu_nms': False, 'min_size': 0 } fcis = FCISPSROIAlignResNet101( n_fg_class=len(coco_instance_segmentation_label_names), min_size=800, max_size=1333, anchor_scales=(2, 4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') if args.no_ohem: model = FCISTrainChain( fcis, n_ohem_sample=None, proposal_target_creator=ProposalTargetCreator(n_sample=128)) else: model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( ConcatenatedDataset( COCOInstanceSegmentationDataset(split='train'), COCOInstanceSegmentationDataset(split='valminusminival')), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) test_dataset = COCOInstanceSegmentationDataset(split='minival', use_crowded=True, return_crowded=True, return_area=True) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr * comm.size, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, masks, labels, bboxes, scales indices_concat=[0, 1, 2, 4], # img, masks, labels, _, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (18, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift('lr', 0.1, init=args.lr * comm.size), trigger=ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 10, 'iteration' # training extensions model_name = model.fcis.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.fcis, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger( [len(train_dataset) * 12, len(train_dataset) * 15], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate should be multiplied by the number of gpu') parser.add_argument('--epoch', '-e', type=int, default=18) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=12) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model proposal_creator_params = FCISResNet101.proposal_creator_params proposal_creator_params['min_size'] = 2 fcis = FCISResNet101( n_fg_class=len(coco_instance_segmentation_label_names), anchor_scales=(4, 8, 16, 32), pretrained_model='imagenet', iter2=False, proposal_creator_params=proposal_creator_params) fcis.use_preset('coco_evaluate') proposal_target_creator = ProposalTargetCreator() proposal_target_creator.neg_iou_thresh_lo = 0.0 model = FCISTrainChain(fcis, proposal_target_creator=proposal_target_creator) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOInstanceSegmentationDataset(year='2014', split='train') vmml_dataset = COCOInstanceSegmentationDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) # test dataset if comm.rank == 0: test_dataset = COCOInstanceSegmentationDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) indices = np.arange(len(test_dataset)) test_dataset = test_dataset.slice[indices] test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler @make_shift('lr') def lr_scheduler(trainer): base_lr = args.lr iteration = trainer.updater.iteration epoch = trainer.updater.epoch if (iteration * comm.size) < 2000: rate = 0.1 elif epoch < args.cooldown_epoch: rate = 1 else: rate = 0.1 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationCOCOEvaluator( test_iter, model.fcis, label_names=coco_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--np', type=int, default=8) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=16, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=1024, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=1, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank # Set up workspace # 12 GB GPU RAM for workspace chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024) chainer.global_config.cv_resize_backend = 'cv2' # Setup the data type # when initializing models as follows, their data types will be casted. # Weethave to forbid the usage of cudnn if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') ###################################### # Setup model ####################################### # Apply ada loss transform recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker( check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, # 'power_of_two': False, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) if comm.rank == 0: print(model) train_chain = MultiboxTrainChain(model_, comm=comm) chainer.cuda.get_device_from_id(device).use() # to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() shared_mem = 100 * 1000 * 1000 * 4 if args.dataset_dir is not None: chainer.dataset.set_dataset_root(args.dataset_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=8, n_prefetch=2, shared_mem=shared_mem) if comm.rank == 0: # NOTE: only performed on the first device test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) # if args.dtype == 'mixed16': # updater.loss_scale = 8 iteration_interval = (args.iteration, 'iteration') trainer = training.Trainer(updater, iteration_interval, args.out) # trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), # trigger=triggers.ManualScheduleTrigger( # args.step, 'iteration')) if args.batchsize != 32: warmup_attr_ratio = 0.1 # NOTE: this is confusing but it means n_iter warmup_n_epoch = 1000 lr_shift = chainerlp.extensions.ExponentialShift( 'lr', 0.1, init=args.lr * warmup_attr_ratio, warmup_attr_ratio=warmup_attr_ratio, warmup_n_epoch=warmup_n_epoch, schedule=args.step) trainer.extend(lr_shift, trigger=(1, 'iteration')) if comm.rank == 0: if not args.profiling: trainer.extend(DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_per_n_iteration, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: if comm.rank == 0: stack.enter_context(hook) trainer.run() # store recorded results if comm.rank == 0: # NOTE: only export in the first rank recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv( os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def get_sbd_augmented_voc(): voc = VOCSemanticSegmentationWithBboxDataset( split='aug').slice[:, ['img', 'bbox', 'label']] sbd = SBDBboxDataset() dataset = ConcatenatedDataset(voc, sbd) return dataset
def test_invalid_keys(self): dataset0 = self.dataset.slice[:, ('item0', 'item1')] dataset1 = self.dataset.slice[:, ('item0', 'item2')] with self.assertRaises(ValueError): ConcatenatedDataset(dataset0, dataset1)
def test_empty(self): with self.assertRaises(ValueError): ConcatenatedDataset()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: LightHeadRCNN') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=1234) parser.add_argument('--batch-size', '-b', type=int, default=2) args = parser.parse_args() # chainermn comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank np.random.seed(args.seed) random.seed(args.seed) # model light_head_rcnn = LightHeadRCNNResNet101( pretrained_model='imagenet', n_fg_class=len(coco_bbox_label_names)) light_head_rcnn.use_preset('evaluate') model = LightHeadRCNNTrainChain(light_head_rcnn) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # train dataset train_dataset = COCOBboxDataset(year='2014', split='train') vmml_dataset = COCOBboxDataset(year='2014', split='valminusminival') # filter non-annotated data train_indices = np.array([ i for i, label in enumerate(train_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) train_dataset = train_dataset.slice[train_indices] vmml_indices = np.array([ i for i, label in enumerate(vmml_dataset.slice[:, ['label']]) if len(label[0]) > 0 ], dtype=np.int32) vmml_dataset = vmml_dataset.slice[vmml_indices] train_dataset = TransformDataset( ConcatenatedDataset(train_dataset, vmml_dataset), ('img', 'bbox', 'label', 'scale'), Transform(model.light_head_rcnn)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=args.batch_size) if comm.rank == 0: test_dataset = COCOBboxDataset(year='2014', split='minival', use_crowded=True, return_crowded=True, return_area=True) test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(momentum=0.9), comm) optimizer.setup(model) global_context_module = model.light_head_rcnn.head.global_context_module global_context_module.col_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.col.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row_max.b.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.W.update_rule.add_hook(GradientScaling(3.0)) global_context_module.row.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0001)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.light_head_rcnn.extractor.conv1.disable_update() model.light_head_rcnn.extractor.res2.disable_update() converter = functools.partial( concat_examples, padding=0, # img, bboxes, labels, scales indices_concat=[0, 2, 3], # img, _, labels, scales indices_to_device=[0], # img ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, converter=converter, device=device) trainer = chainer.training.Trainer(updater, (30, 'epoch'), out=args.out) @make_shift('lr') def lr_scheduler(trainer): base_lr = 0.0005 * 1.25 * args.batch_size * comm.size warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration epoch = trainer.updater.epoch if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif epoch < 20: rate = 1 elif epoch < 26: rate = 0.1 else: rate = 0.01 return rate * base_lr trainer.extend(lr_scheduler) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions model_name = model.light_head_rcnn.__class__.__name__ trainer.extend(chainer.training.extensions.snapshot_object( model.light_head_rcnn, savefun=chainer.serializers.save_npz, filename='%s_model_iter_{.updater.iteration}.npz' % model_name), trigger=(1, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) report_items = [ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'validation/main/map/iou=0.50:0.95/area=all/max_dets=100', ] trainer.extend(extensions.PrintReport(report_items), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionCOCOEvaluator( test_iter, model.light_head_rcnn, label_names=coco_bbox_label_names), trigger=ManualScheduleTrigger([20, 26], 'epoch')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()