def check_weight_decay(self): w = self.target.param.data g = self.target.param.grad decay = 0.2 expect = w - g - decay * w opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer_hooks.WeightDecay(decay)) opt.update() testing.assert_allclose(expect, w)
def _updated_array(self, backend_config, loss_scale): arr = np.arange(3, dtype=np.float32) param = chainer.Parameter(arr) link = chainer.Link() with link.init_scope(): link.p = param link.to_device(backend_config.device) opt = optimizers.SGD(lr=1) opt.setup(link) opt.add_hook(optimizer_hooks.WeightDecay(1 / 8.)) loss = F.sum(link.p**3) loss.backward(loss_scale=loss_scale) opt.update() return link.p.array
def check_weight_decay(self, backend_configs): target = self.target assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] decay = 0.2 # Compute expected expects = [] for param, device in zip(target.params(), devices): expects.append(param.array - param.grad - decay * param.array) param.to_device(device) opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer_hooks.WeightDecay(decay)) opt.update() # Validate for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array)
def add_hooks(optimizer, configs): """ :param optimizer: chainer.Optimizer, chainerのオプティマイザ :param configs: pyner.util.config.ConfigParser """ if "optimizer" not in configs: raise Exception("Optimizer configurations are not found") optimizer_configs = configs["optimizer"] if optimizer_configs.get("weight_decay"): logger.debug("\x1b[31mSet weight decay\x1b[0m") optimizer.add_hook( optimizer_hooks.WeightDecay(optimizer_configs["weight_decay"]) ) if "gradient_clipping" in optimizer_configs: clipping_threshold = optimizer_configs["gradient_clipping"] msg = "Enable gradient clipping:" msg += f" threshold \x1b[31m{clipping_threshold}\x1b[0m" logger.debug(msg) optimizer.add_hook(optimizer_hooks.GradientClipping(clipping_threshold)) return optimizer
def add_hooks(optimizer, configs): """ :param optimizer: chainer.Optimizer, chainerのオプティマイザ :param configs: pyner.util.config.ConfigParser """ if 'optimizer' not in configs: raise Exception('Optimizer configurations are not found') optimizer_configs = configs['optimizer'] if optimizer_configs.get('weight_decay'): logger.debug('\x1b[31mSet weight decay\x1b[0m') optimizer.add_hook( optimizer_hooks.WeightDecay(optimizer_configs['weight_decay'])) if 'gradient_clipping' in optimizer_configs: clipping_threshold = optimizer_configs['gradient_clipping'] msg = 'Enable gradient clipping:' msg += f' threshold \x1b[31m{clipping_threshold}\x1b[0m' logger.debug(msg) optimizer.add_hook( optimizer_hooks.GradientClipping(clipping_threshold)) return optimizer
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 Train') parser.add_argument('--names') parser.add_argument('--train') parser.add_argument('--valid', default='') parser.add_argument('--detection', default='') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.5) parser.add_argument('--darknet', default='') parser.add_argument('--darknet_class', type=int, default=-1) parser.add_argument('--steps', type=int, nargs='*', default=[-10200, -5200]) parser.add_argument('--scales', type=float, nargs='*', default=[0.1, 0.1]) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) class_names = load_list(args.names) random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet_class = args.darknet_class if args.darknet_class > 0 else len(class_names) darknet53 = Darknet53(darknet_class) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(len(class_names), base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = YOLODataset(args.train, train=True, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train_iter = chainer.iterators.SerialIterator(train, args.batchsize) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, shared_mem=(448**2*3+(1+4)*100)*4) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') print_entries = ['epoch', 'iteration', 'main/loss', 'elapsed_time'] plot_keys = ['main/loss'] snapshot_key = 'main/loss' if len(args.valid) > 0: print_entries = ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time'] plot_keys = ['main/loss', 'validation/main/loss'] snapshot_key = 'validation/main/loss' test = YOLODataset(args.valid, train=False, classifier=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( plot_keys, 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport(print_entries), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( snapshot_key, snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_backup.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=(args.iteration, 'iteration')) steps = args.steps for i in range(len(steps)): if steps[i] < 0: steps[i] = args.iteration + steps[i] scales = args.scales print('# steps: {}'.format(steps)) print('# scales: {}'.format(scales)) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=steps, scales=scales )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) if len(args.detection): detector = YOLOv3Predictor(yolov3, thresh=args.thresh) trainer.extend(YOLODetection( detector, load_list(args.detection), class_names, (416, 416),args.thresh, trigger=display_interval, device=device )) print('') print('RUN') print('') trainer.run()
def main(): import multiprocessing multiprocessing.set_start_method('forkserver') parser = argparse.ArgumentParser(description='Cats training.') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--normalization', type=str, choices=NORMALIZATIONS, required=True, help='Normalization method') args = parser.parse_args() gpu = args.gpu out_dir = args.out image_dir = 'images' batch_size = 32 short_edge = 256 crop_edge = 224 seed = 3141592653 n_processes = len(os.sched_getaffinity(0)) normalization = get_normalization(args.normalization) initial_lr = 0.1 epochs = 300 lr_reduce_interval = (100, 'epoch') lr_reduce_rate = 0.1 weight_decay = 5e-4 numpy_random = numpy.random.RandomState(seed) random = Random.from_numpy_random(numpy_random) train_dataset, valid_dataset, _ = CatsDataset.train_valid( image_dir, short_edge, crop_edge, random) order_sampler = iterators.ShuffleOrderSampler(numpy_random) train_iter = iterators.MultiprocessIterator(train_dataset, batch_size, repeat=True, shuffle=None, n_processes=n_processes, n_prefetch=4, order_sampler=order_sampler) valid_iter = iterators.MultiprocessIterator(valid_dataset, batch_size, repeat=False, shuffle=False, n_processes=n_processes, n_prefetch=4) numpy.random.seed(seed) model = ResNet50(len(CatsDataset.classes), normalization) model = chainer.links.Classifier(model) if gpu >= 0: chainer.cuda.get_device_from_id(gpu).use() model.to_gpu() optimizer = optimizers.MomentumSGD(lr=initial_lr) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(weight_decay)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu) trainer = training.Trainer(updater, (epochs, 'epoch'), out=out_dir) trainer.extend(extensions.ExponentialShift('lr', lr_reduce_rate), trigger=lr_reduce_interval) trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.run() chainer.serializers.save_npz(os.path.join(out_dir, 'model.npz'), model)
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-voc-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.4) parser.add_argument('--darknet', default='') parser.add_argument('--validation_size', type=int, default=32) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet53 = Darknet53(20) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train = train[np.arange(args.batchsize)] test = YOLOVOCDataset(test, classifier=False) test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1] )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) detector = YOLOv3Predictor(yolov3, thresh=args.thresh) class_names = load_list('./data/voc.names') trainer.extend(YOLODetection( detector, ['./data/image/dog.jpg'], class_names, size=(416, 416) ,thresh=args.thresh, trigger=display_interval, device=device )) trainer.run()