def __init__(self): super(Actor, self).__init__( h1 = L.Linear(O_DIM, 400), h2 = L.Linear(400, 300), h3 = L.Linear(300, A_DIM, initialW=initializers.Uniform(scale=0.003)), ) self.optimizer = optimizers.Adam(alpha=ACTOR_LEARNING_RATE) self.optimizer.setup(self) self.optimizer.add_hook(optimizer_hooks.GradientClipping(2.0))
def __init__(self): super(Critic, self).__init__( h1=L.Linear(O_DIM, 400), h2_s=L.Linear(400, 300, nobias=True), h2_a=L.Linear(A_DIM, 300), h3=L.Linear(300, 1, initialW=initializers.Uniform(scale=0.003)), ) self.optimizer = optimizers.Adam(alpha=CRITIC_LEARNING_RATE) self.optimizer.setup(self) #self.optimizer.add_hook(optimizer_hooks.WeightDecay(CRITIC_WEIGHT_DECAY)) self.optimizer.add_hook(optimizer_hooks.GradientClipping(2.0))
def get_trainer_and_reporter( trial:Trial, model:CbLossClassifier, iter_test:iterators.SerialIterator, iter_train:iterators.SerialIterator, batch_converter, args, device=0, best_params={}): if best_params != {}:# 過去の best_param 使う場合 learning_rate = best_params['learning_rate'] else: learning_rate = trial.suggest_loguniform('learning_rate', 1e-5, 1e-1) grad_clipping = trial.suggest_uniform('grad_clipping',0,1.0) optimizer = optimizers.SGD(lr=learning_rate) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.GradientClipping(threshold=grad_clipping)) updater = training.StandardUpdater( iter_train, optimizer, device=device, converter=batch_converter ) early_trigger = training.triggers.EarlyStoppingTrigger( check_trigger=(1, "epoch"), monitor="validation/main/accuracy", patients=3, mode="max", max_trigger=(args.epoch, "epoch") ) trainer = training.Trainer(updater,early_trigger,out='optuna') trainer.extend(extensions.Evaluator(iter_test, model,device=device,converter=batch_converter)) snapshot_writer = training.extensions.snapshot_writers.ThreadQueueWriter() trainer.extend(training.extensions.snapshot_object( target=model, filename='model_{}.npz'.format(args.desc), writer=snapshot_writer),trigger=(10,'epoch')) reporter = extensions.LogReport() trainer.extend(reporter) trainer.extend(integration.ChainerPruningExtension( trial,args.pruning_key,(args.pruning_trigger_epoch,'epoch'))) iter_test.reset() iter_train.reset() return trainer,reporter
def __init__(self, state_dim, action_num, lr=1.0 * 1e-3, batch_size=100, device=-1, shared_model=False, clip_grads=False, use_batch_norm=True, double_q=False): super(NAF, self).__init__() self._q_optimizer = optimizers.Adam(alpha=lr) self._batch_size = batch_size self._double_q = double_q if shared_model: self._q = NafSharedQFunction(state_dim=state_dim, action_num=action_num, use_batch_norm=use_batch_norm) self._target_q = NafSharedQFunction(state_dim=state_dim, action_num=action_num, use_batch_norm=use_batch_norm) else: self._q = NafQFunction(state_dim=state_dim, action_num=action_num, use_batch_norm=use_batch_norm) self._target_q = NafQFunction(state_dim=state_dim, action_num=action_num, use_batch_norm=use_batch_norm) if not device < 0: self._q.to_gpu() self._target_q.to_gpu() self._q_optimizer.setup(self._q) if clip_grads: self._q_optimizer.add_hook(optimizer_hooks.GradientClipping(1.0)) mean = np.zeros(shape=(action_num), dtype=np.float32) sigma = np.ones(shape=(action_num), dtype=np.float32) self._exploration_noise = Normal(loc=mean, scale=sigma * 0.1) self._device = device self._initialized = False self._action_num = action_num
def check_clipping(self, multiplier): w = self.target.param.data g = self.target.param.grad xp = backend.get_array_module(w) norm = xp.sqrt(gradient_clipping._sum_sqnorm(g)) threshold = norm * multiplier if multiplier < 1: expect = w - g * multiplier else: expect = w - g opt = optimizers.SGD(lr=1) opt.setup(self.target) opt.add_hook(optimizer_hooks.GradientClipping(threshold)) opt.update() testing.assert_allclose(expect, w)
def create_optimizer(model): cp: Dict[str, Any] = copy(config.train.optimizer) n = cp.pop("name").lower() if n == "adam": optimizer = optimizers.Adam(**cp) elif n == "sgd": optimizer = optimizers.SGD(**cp) else: raise ValueError(n) optimizer.setup(model) if config.train.optimizer_gradient_clipping is not None: optimizer.add_hook( optimizer_hooks.GradientClipping( config.train.optimizer_gradient_clipping)) return optimizer
def check_clipping(self, backend_configs, rate): target = self.target norm = self.norm assert len(backend_configs) == len(list(target.params())) devices = [bc.device for bc in backend_configs] threshold = norm * rate expects = [] for param, device in zip(target.params(), devices): expects.append(param.array - param.grad * min(1, rate)) param.to_device(device) opt = optimizers.SGD(lr=1) opt.setup(target) opt.add_hook(optimizer_hooks.GradientClipping(threshold)) opt.update() for expect, param in zip(expects, target.params()): testing.assert_allclose(expect, param.array)
def add_hooks(optimizer, configs): """ :param optimizer: chainer.Optimizer, chainerのオプティマイザ :param configs: pyner.util.config.ConfigParser """ if "optimizer" not in configs: raise Exception("Optimizer configurations are not found") optimizer_configs = configs["optimizer"] if optimizer_configs.get("weight_decay"): logger.debug("\x1b[31mSet weight decay\x1b[0m") optimizer.add_hook( optimizer_hooks.WeightDecay(optimizer_configs["weight_decay"]) ) if "gradient_clipping" in optimizer_configs: clipping_threshold = optimizer_configs["gradient_clipping"] msg = "Enable gradient clipping:" msg += f" threshold \x1b[31m{clipping_threshold}\x1b[0m" logger.debug(msg) optimizer.add_hook(optimizer_hooks.GradientClipping(clipping_threshold)) return optimizer
def add_hooks(optimizer, configs): """ :param optimizer: chainer.Optimizer, chainerのオプティマイザ :param configs: pyner.util.config.ConfigParser """ if 'optimizer' not in configs: raise Exception('Optimizer configurations are not found') optimizer_configs = configs['optimizer'] if optimizer_configs.get('weight_decay'): logger.debug('\x1b[31mSet weight decay\x1b[0m') optimizer.add_hook( optimizer_hooks.WeightDecay(optimizer_configs['weight_decay'])) if 'gradient_clipping' in optimizer_configs: clipping_threshold = optimizer_configs['gradient_clipping'] msg = 'Enable gradient clipping:' msg += f' threshold \x1b[31m{clipping_threshold}\x1b[0m' logger.debug(msg) optimizer.add_hook( optimizer_hooks.GradientClipping(clipping_threshold)) return optimizer
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 Train') parser.add_argument('--names') parser.add_argument('--train') parser.add_argument('--valid', default='') parser.add_argument('--detection', default='') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.5) parser.add_argument('--darknet', default='') parser.add_argument('--darknet_class', type=int, default=-1) parser.add_argument('--steps', type=int, nargs='*', default=[-10200, -5200]) parser.add_argument('--scales', type=float, nargs='*', default=[0.1, 0.1]) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) class_names = load_list(args.names) random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet_class = args.darknet_class if args.darknet_class > 0 else len(class_names) darknet53 = Darknet53(darknet_class) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(len(class_names), base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = YOLODataset(args.train, train=True, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train_iter = chainer.iterators.SerialIterator(train, args.batchsize) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, shared_mem=(448**2*3+(1+4)*100)*4) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') print_entries = ['epoch', 'iteration', 'main/loss', 'elapsed_time'] plot_keys = ['main/loss'] snapshot_key = 'main/loss' if len(args.valid) > 0: print_entries = ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time'] plot_keys = ['main/loss', 'validation/main/loss'] snapshot_key = 'validation/main/loss' test = YOLODataset(args.valid, train=False, classifier=False) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( plot_keys, 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport(print_entries), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( snapshot_key, snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_backup.npz'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=(args.iteration, 'iteration')) steps = args.steps for i in range(len(steps)): if steps[i] < 0: steps[i] = args.iteration + steps[i] scales = args.scales print('# steps: {}'.format(steps)) print('# scales: {}'.format(scales)) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=steps, scales=scales )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) if len(args.detection): detector = YOLOv3Predictor(yolov3, thresh=args.thresh) trainer.extend(YOLODetection( detector, load_list(args.detection), class_names, (416, 416),args.thresh, trigger=display_interval, device=device )) print('') print('RUN') print('') trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer YOLOv3 VOC Train') parser.add_argument('--batchsize', '-b', type=int, default=8) parser.add_argument('--iteration', '-i', type=int, default=50200) parser.add_argument('--gpus', '-g', type=int, nargs='*', default=[]) parser.add_argument('--out', '-o', default='yolov3-voc-result') parser.add_argument('--seed', type=int, default=0) parser.add_argument('--display_interval', type=int, default=100) parser.add_argument('--snapshot_interval', type=int, default=100) parser.add_argument('--ignore_thresh', type=float, default=0.5) parser.add_argument('--thresh', type=float, default=0.4) parser.add_argument('--darknet', default='') parser.add_argument('--validation_size', type=int, default=32) args = parser.parse_args() print('GPUs: {}'.format(args.gpus)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# iteration: {}'.format(args.iteration)) print('') random.seed(args.seed) np.random.seed(args.seed) base = None if len(args.darknet) > 0: darknet53 = Darknet53(20) serializers.load_npz(args.darknet, darknet53) base = darknet53.base yolov3 = YOLOv3(20, base, ignore_thresh=args.ignore_thresh) model = YOLOv3Loss(yolov3) device = -1 if len(args.gpus) > 0: device = args.gpus[0] cuda.cupy.random.seed(args.seed) cuda.get_device_from_id(args.gpus[0]).use() if len(args.gpus) == 1: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(optimizer_hooks.WeightDecay(0.0005), 'hook_decay') optimizer.add_hook(optimizer_hooks.GradientClipping(10.0), 'hook_grad_clip') train = VOCBboxDataset(split='train') test = VOCBboxDataset(split='val') train = YOLOVOCDataset(train, classifier=False, jitter=0.3, hue=0.1, sat=1.5, val=1.5) #train = train[np.arange(args.batchsize)] test = YOLOVOCDataset(test, classifier=False) test = test[np.random.permutation(np.arange(len(test)))[:min(args.validation_size, len(test))]] train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) if len(args.gpus) <= 1: updater = training.StandardUpdater( train_iter, optimizer, converter=concat_yolo, device=device) else: devices = {'main': args.gpus[0]} for gpu in args.gpus[1:]: devices['gpu{}'.format(gpu)] = gpu updater = training.ParallelUpdater( train_iter, optimizer, converter=concat_yolo, devices=devices) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) display_interval = (args.display_interval, 'iteration') snapshot_interval = (args.snapshot_interval, 'iteration') trainer.extend(extensions.Evaluator( test_iter, model, converter=concat_yolo, device=device), trigger=display_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=display_interval)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'iteration', display_interval, file_name='loss.png')) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss', 'elapsed_time']), trigger=display_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_snapshot.npz'), trigger=training.triggers.MinValueTrigger( 'validation/main/loss', snapshot_interval)) trainer.extend(extensions.snapshot_object( yolov3, 'yolov3_final.npz'), trigger=snapshot_interval) trainer.extend(DarknetShift( optimizer, 'steps', args.iteration, burn_in=1000, steps=[args.iteration-10200,args.iteration-5200], scales=[0.1,0.1] )) trainer.extend(CropSizeUpdater(train, [(10+i)*32 for i in range(0,5)], args.iteration - 200)) detector = YOLOv3Predictor(yolov3, thresh=args.thresh) class_names = load_list('./data/voc.names') trainer.extend(YOLODetection( detector, ['./data/image/dog.jpg'], class_names, size=(416, 416) ,thresh=args.thresh, trigger=display_interval, device=device )) trainer.run()
def main(): #hyper parameter z_dim = 100 dis_ln = 1 grad_clip = 0.1 adam_alpha = 1e-4 adam_beta1 = 0.5 adam_beta2 = 0.9 #training option is_random = False #whether conditional npz_interval = 100 max_epoch = 200 out_dir = 'result-dcgan' batch_size = 128 device = 0 gen_npz = None dis_npz = None #gen_npz = 'gen_snapshot_epoch-200.npz' #dis_npz = 'dis_snapshot_epoch-200.npz' train, _ = datasets.mnist.get_mnist(ndim=3) train_iter = iterators.SerialIterator(train, batch_size) gen = Generator(z_dim) gen.to_gpu(device=device) if gen_npz is not None: chainer.serializers.load_npz(out_dir + '/' + gen_npz, gen) gen_opt = optimizers.Adam(alpha=adam_alpha, beta1=adam_beta1, beta2=adam_beta2) gen_opt.setup(gen) gen_opt.add_hook(optimizer_hooks.GradientClipping(grad_clip)) dis = Discriminator() dis.to_gpu(device=device) if dis_npz is not None: chainer.serializers.load_npz(out_dir + '/' + dis_npz, dis) dis_opt = optimizers.Adam(alpha=adam_alpha, beta1=adam_beta1, beta2=adam_beta2) dis_opt.setup(dis) dis_opt.add_hook(optimizer_hooks.GradientClipping(grad_clip)) updater = GANUpdater(dis_ln=dis_ln, is_random=is_random, iterator=train_iter, optimizer={ 'gen': gen_opt, 'dis': dis_opt }, device=device) trainer = training.Trainer(updater, (max_epoch, 'epoch'), out=out_dir) trainer.extend(ex.LogReport(log_name=None, trigger=(1, 'iteration'))) trainer.extend( ex.PrintReport([ 'epoch', 'iteration', 'gen/loss', 'dis/loss', 'dis/fake_prob', 'elapsed_time' ])) trainer.extend( ex.PlotReport( ['gen/loss', 'dis/loss'], x_key='epoch', file_name='loss.png', postprocess=lambda *args: gen.save_img(out_dir=out_dir + '/img'))) trainer.extend( ex.PlotReport(['dis/fake_prob'], x_key='epoch', file_name='probability.png')) trainer.extend(ex.snapshot_object( gen, 'gen_snapshot_epoch-{.updater.epoch}.npz'), trigger=(npz_interval, 'epoch')) trainer.extend(ex.snapshot_object( dis, 'dis_snapshot_epoch-{.updater.epoch}.npz'), trigger=(npz_interval, 'epoch')) trainer.run()
def get_trainer_and_reporter( model:CbLossClassifier, df_test:pd.DataFrame, df_train:pd.DataFrame, batch_size, batch_converter, args, device, plot_list, print_list=[], learning_rate=1e-5, grad_clipping=0.5): iter_train = get_iter(df_train,batch_size,shuffle=True ,repeat=True) iter_test = get_iter(df_test ,batch_size,shuffle=False,repeat=False) optimizer = optimizers.SGD(lr=learning_rate) optimizer.setup(model) if grad_clipping != None: # grad_clipping ありの場合 optimizer.add_hook(optimizer_hooks.GradientClipping(threshold=grad_clipping)) updater = training.StandardUpdater( iter_train, optimizer, device=device, converter=batch_converter ) trainer = training.Trainer(updater,(args.epoch,'epoch'),out='result') ext_evaluator = training.extensions.Evaluator(iter_test, model,device=device,converter=batch_converter) ext_snapshot_object = training.extensions.snapshot_object( target = model, filename = 'model_{}.npz'.format(args.desc), writer = training.extensions.snapshot_writers.ThreadQueueWriter()) ext_reporter = training.extensions.LogReport() ext_plotreport_list = [] for tag,plot in plot_list: ext_plotreport_list.append( training.extensions.PlotReport( plot, x_key = 'epoch', file_name = '{}/{}_{}.png'.format(args.result_directory,tag,args.desc)) ) ext_dump_graph = training.extensions.dump_graph('main/loss') trainer.extend(ext_evaluator) trainer.extend(ext_snapshot_object,trigger=(10,'epoch')) trainer.extend(ext_reporter) for pl in ext_plotreport_list: trainer.extend(pl) trainer.extend(ext_dump_graph) if print_list != []: ext_printreport = training.extensions.PrintReport(print_list) trainer.extend(ext_printreport) iter_test.reset() iter_train.reset() return trainer,ext_reporter