def setup_dataset(cfg, split): if split == 'train': dataset_type = cfg.dataset.train elif split == 'eval': dataset_type = cfg.dataset.eval else: raise ValueError() if dataset_type == 'COCO': if split == 'train': dataset = COCOBboxDataset(split='train', year='2017') elif split == 'eval': dataset = COCOBboxDataset(split='val', year='2017', use_crowded=True, return_area=True, return_crowded=True) else: raise ValueError() elif dataset_type == 'VOC': if split == 'train': dataset = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) elif split == 'eval': dataset = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) else: raise ValueError() else: raise ValueError() return dataset
def _wrapper(self, to_double=False, bit=10, mulaw=True): speaker_size = 4 model = _create_model( local_size=0, speaker_size=speaker_size, ) datasets = [ SignWaveDataset( sampling_rate=sampling_rate, sampling_length=sampling_length, to_double=to_double, bit=bit, mulaw=mulaw, frequency=(i + 1) * 110, ) for i in range(speaker_size) ] dataset = SpeakerWavesDataset( wave_dataset=ConcatenatedDataset(*datasets), speaker_nums=list( chain.from_iterable([i] * len(d) for i, d in enumerate(datasets))), ) updater, reporter = setup_support(batch_size, gpu, model, dataset) trained_nll = _get_trained_nll() def _first_hook(o): self.assertTrue(o['main/nll_coarse'].data > trained_nll) if to_double: self.assertTrue(o['main/nll_fine'].data > trained_nll) def _last_hook(o): self.assertTrue(o['main/nll_coarse'].data < trained_nll) if to_double: self.assertTrue(o['main/nll_fine'].data < trained_nll) train_support(iteration, reporter, updater, _first_hook, _last_hook) # save model serializers.save_npz( '/tmp/' f'test_training_wavernn' f'-to_double={to_double}' f'-bit={bit}' f'-mulaw={mulaw}' f'-speaker_size={speaker_size}' f'-iteration={iteration}.npz', model.predictor, )
def load_dataset(config): train_config = config['train'] cl = get_class(train_config['module']) train_loader = getattr(cl, train_config['name']) train_data = train_loader(**train_config['args']) if parse_dict(config, 'train2', None): train_config = config['train2'] cl = get_class(train_config['module']) train_loader = getattr(cl, train_config['name']) train_data2 = train_loader(**train_config['args']) train_data = ConcatenatedDataset(train_data, train_data2) test_data = None if parse_dict(config, 'valid', None): test_config = config['valid'] cl = get_class(test_config['module']) test_loader = getattr(cl, test_config['name']) test_data = test_loader(**test_config['args']) return train_data, test_data
def Dataset(fns, for_evaluate=False): inputs = [ LazyInput( path_wave=wave_paths[fn], path_silence=silence_paths[fn], path_local=local_paths[fn], ) for fn in fns ] if not for_evaluate: dataset = WavesDataset( inputs=inputs, sampling_length=config.sampling_length, to_double=not config.only_coarse, bit=config.bit_size, mulaw=config.mulaw, local_padding_size=config.local_padding_size, gaussian_noise_sigma=config.gaussian_noise_sigma, ) else: dataset = NonEncodeWavesDataset( inputs=inputs, time_length=config.time_length_evaluate, local_padding_time_length=config. local_padding_time_length_evaluate, ) if speaker_nums is not None: dataset = SpeakerWavesDataset( wave_dataset=dataset, speaker_nums=[speaker_nums[fn] for fn in fns], ) if for_evaluate: dataset = ConcatenatedDataset(*([dataset] * config.num_times_evaluate)) return dataset
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def setUp(self): self.concatenated_dataset = ConcatenatedDataset(*self.datasets) self.expected_dataset = [ sample for dataset in self.datasets for sample in dataset ]
train07 = VOCBboxDataset(data_dir='auto', year='2007', split='trainval', use_difficult=True, return_difficult=False) train12 = VOCBboxDataset(data_dir='auto', year='2012', split='trainval', use_difficult=True, return_difficult=False) train07_right = train07[len(train07) // 2:] train12_right = train12[len(train12) // 2:] train_right = ConcatenatedDataset(train07_right, train12_right) train = TransformDataset(train_right, _Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, batch_size, repeat=True, shuffle=True) test = VOCBboxDataset(data_dir='auto', year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, batch_size, repeat=False,
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=32800, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=32800, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=100, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # Setting data types if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) # Initialize model if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') # Apply adaptive loss scaling recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) # Finalize the model train_chain = MultiboxTrainChain(model_) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cp.random.seed(0) # NOTE: we have to transfer modules explicitly to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() # Prepare dataset if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) # train_iter = chainer.iterators.MultiprocessIterator( # train, args.batchsize) # , n_processes=8, n_prefetch=2) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) # train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: stack.enter_context(hook) trainer.run() recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpus', type=str, default="-1") parser.add_argument('--batchsize', type=int, default=2) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") args = parser.parse_args() gpus = list(filter(lambda x: x >= 0, map(int, args.gpus.split(",")))) num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(512) center_detection_transform = CenterDetectionTransform(512, num_class, 4) train = TransformDataset( ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval') ), data_augmentation_transform ) train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator( test, args.batchsize // len(gpus), repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, 512, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1) gpus.sort() first_gpu = gpus[0] remain_gpu = gpus[1:] train_chain.to_gpu(first_gpu) optimizer = Adam(amsgrad=True) optimizer.setup(train_chain) devices = { "main": first_gpu } for i, gpu in enumerate(remain_gpu): devices[f"{i + 2}"] = gpu updater = training.updaters.ParallelUpdater( train_iter, optimizer, devices=devices, ) log_interval = 1, 'epoch' trainer = Trainer(updater, (args.epoch, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'validation/main/map', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( DetectionVOCEvaluator( test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend( extensions.snapshot_object(detector, 'detector{.updater.epoch:03}.npz'), trigger=(1, 'epoch') ) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('fpn', 'ssd300', 'ssd512'), default='fpn') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--data_dir', type=str, default='auto') parser.add_argument('--dataset', choices=['voc', 'coco'], default='voc') parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--init_scale', type=float, default=1e-2) parser.add_argument('--resume') args = parser.parse_args() if args.dataset == 'voc': train = ConcatenatedDataset( VOCBboxDataset( year='2007', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir), VOCBboxDataset( year='2012', split='trainval', data_dir=join(args.data_dir, 'VOCdevkit/VOC2012') if args.data_dir != 'auto' else args.data_dir)) test = VOCBboxDataset( year='2007', split='test', use_difficult=True, return_difficult=True, data_dir=join(args.data_dir, 'VOCdevkit/VOC2007') if args.data_dir != 'auto' else args.data_dir) label_names = voc_bbox_label_names elif args.dataset == 'coco': # todo: use train+valminusminival(=coco2017train) # https://github.com/chainer/chainercv/issues/651 train = COCOBboxDataset(data_dir=args.data_dir, split='train') test = COCOBboxDataset(data_dir=args.data_dir, split='val') label_names = coco_bbox_label_names if args.model == 'ssd300': model = SSD300( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512( n_fg_class=len(label_names), pretrained_model='imagenet') elif args.model == 'fpn': model = FPNSSD( n_fg_class=len(label_names), pretrained_model='imagenet', init_scale=args.init_scale) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=2) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") parser.add_argument('--input_size', type=int, default=512) args = parser.parse_args() dtype = np.float32 num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(args.input_size) center_detection_transform = CenterDetectionTransform(args.input_size, num_class, 4, dtype=dtype) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), data_augmentation_transform) train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, args.input_size, num_class, dtype=dtype) #detector = CenterDetector(SimpleCNN, args.input_size, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1) #train_chain = CenterDetectorTrain(detector, 1, 0, 0) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() train_chain.to_gpu(args.gpu) optimizer = Adam(alpha=1.25e-4) #optimizer = SGD() optimizer.setup(train_chain) updater = StandardUpdater(train_iter, optimizer, device=args.gpu) log_interval = 1, 'epoch' log_interval_mini = 500, 'iteration' trainer = Trainer(updater, (args.epoch, 'epoch'), out=f"result{args.gpu}") trainer.extend(extensions.LogReport(trigger=log_interval_mini)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'main/hm_mae', 'main/hm_pos_loss', 'main/hm_neg_loss', 'validation/main/map', ]), trigger=log_interval_mini) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(DetectionVOCEvaluator(test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( detector, 'detector{.updater.epoch:03}.npz'), trigger=(1, 'epoch')) trainer.run()
return bboxes, labels, scores model.predict_each_box = predict_each_box.__get__(model) train07 = VOCBboxDataset(data_dir='auto', year='2007', split='trainval', use_difficult=True, return_difficult=False) train12 = VOCBboxDataset(data_dir='auto', year='2012', split='trainval', use_difficult=True, return_difficult=False) train = ConcatenatedDataset(train07, train12) test = VOCBboxDataset(data_dir='auto', year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, batch_size, repeat=False, shuffle=False) print('image prepare') if subset == 'left': train07_left = train07[:len(train07) // 2] test_left = test[:len(test) // 2]
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=1) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() model = SSD300( n_fg_class=len(inria_bbox_label_names), pretrained_model='./ssd_vgg16_imagenet_2017_06_09.npz') print("###n_fg_class= ", len(inria_bbox_label_names)) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset( INRIABboxDataset(data_dir='../INRIAPerson', split='Train') ), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = INRIABboxDataset(data_dir='../INRIAPerson', split='Test') test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-4), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend( DetectionINRIAEvaluator( test_iter, model, use_07_metric=False, label_names=inria_bbox_label_names), trigger=(1, 'iteration')) log_interval = 1, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) #trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'), trigger=(120000, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=4) parser.add_argument('--epoch', type=int, default=10) parser.add_argument('--mini', action="store_true") args = parser.parse_args() if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator('pure_nccl') print(comm.size) device = comm.intra_rank num_class = len(voc_bbox_label_names) data_augmentation_transform = DataAugmentationTransform(512) center_detection_transform = CenterDetectionTransform(512, num_class, 4) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), data_augmentation_transform) if comm.rank == 0: train = TransformDataset(train, center_detection_transform) if args.mini: train = datasets.SubDataset(train, 0, 100) else: train = None train = chainermn.scatter_dataset(train, comm, shuffle=True) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) if args.mini: test = datasets.SubDataset(test, 0, 20) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) detector = CenterDetector(HourglassNet, 512, num_class) train_chain = CenterDetectorTrain(detector, 1, 0.1, 1, comm=comm) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() optimizer = chainermn.create_multi_node_optimizer(Adam(amsgrad=True), comm) optimizer.setup(train_chain) updater = StandardUpdater(train_iter, optimizer, device=device) trainer = Trainer(updater, (args.epoch, 'epoch')) if comm.rank == 0: log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/hm_loss', 'main/wh_loss', 'main/offset_loss', 'validation/main/map', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(DetectionVOCEvaluator(test_iter, detector, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1, 'epoch')) trainer.extend(extensions.snapshot_object( detector, 'detector{.updator.epoch:03}.npz'), trigger=(1, 'epoch')) trainer.run()
args = parser.parse_args() # TODO: small_lr_layers chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True chainer.config.cudnn_fast_batch_normalization = True model = SSD300(n_fg_class=20, pretrained_model='imagenet') model.use_preset('evaluate') net = SSDClassifier(model, alpha=1, k=3) chainer.cuda.get_device_from_id(0).use() model.to_gpu() # Data Augmentation train = TransformDataset( ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval') ), Transform(model.coder, model.insize, model.mean) ) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) train_iter = iterators.SerialIterator(train, args.batchsize) test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Evaluator evaluator = DetectionVOCEvaluator(test_iter, net.predictor, use_07_metric=True, label_names=voc_bbox_label_names) trainer = create_trainer(train_iter, net, args.gpu_id, args.initial_lr, args.weight_decay, args.freeze_layer, args.small_lr_layers, args.small_initial_lr, args.num_epochs_or_iter, args.epoch_or_iter, args.save_dir)
def handler(context): dataset_alias = context.datasets trainval_2007_dataset_id = dataset_alias['trainval2007'] trainval_2012_dataset_id = dataset_alias['trainval2012'] test_2007_dataset_id = dataset_alias['test2007'] trainval_2007_dataset = list( load_dataset_from_api(trainval_2007_dataset_id)) trainval_2012_dataset = list( load_dataset_from_api(trainval_2012_dataset_id)) test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id)) if network_model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif network_model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset) trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset) test_2007 = DetectionDatasetFromAPI(test_2007_dataset, use_difficult=True, return_difficult=True) train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_2007, BATCHSIZE, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_iterations, 'iteration'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) print_entries = [ 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(nb_iterations, 'iteration')) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator(train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(DetectionVOCEvaluator(test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_bbox_label_names), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
batch_size = 8 model = SSD300( n_fg_class=21, pretrained_model='imagenet') model.to_gpu(gpu_id) train07 = VOCBboxDataset(data_dir='auto',year='2007', split='trainval',use_difficult=True,return_difficult=False) train12 = VOCBboxDataset(data_dir='auto',year='2012', split='trainval',use_difficult=True,return_difficult=False) train07_left = train07[:len(train07)//2] train12_left = train12[:len(train12)//2] train_left = ConcatenatedDataset(train07_left,train12_left) train = TransformDataset(train_left,_Transform(model.coder,model.insize,model.mean)) train_iter = chainer.iterators.SerialIterator(train,batch_size,repeat=True,shuffle=True) test = VOCBboxDataset(data_dir='auto',year='2007', split='test',use_difficult=True,return_difficult=True) test_iter = chainer.iterators.SerialIterator(test,batch_size,repeat=False,shuffle=False) class MultiboxTrainChain(chainer.Chain): def __init__(self, model, alpha=1, k=3): super(MultiboxTrainChain, self).__init__() with self.init_scope(): self.model = model self.alpha = alpha self.k = k