def _get_hook(hook): if hook == 'WeightDecay': return WeightDecay(cfg.solver.weight_decay) elif hook == 'GradientClipping': return GradientClipping(cfg.solver.gradient_clipping_thresh) else: raise ValueError('Not support `hook`: {}.'.format(hook))
def create_trainer(train_iter, net, gpu_id, initial_lr, weight_decay, freeze_layer, small_lr_layers, small_initial_lr, num_epochs_or_iter, epoch_or_iter, save_dir): # Optimizer if gpu_id >= 0: net.to_gpu(gpu_id) optimizer = optimizers.MomentumSGD(lr=initial_lr) optimizer.setup(net) if weight_decay > 0: optimizer.add_hook(WeightDecay(weight_decay)) if freeze_layer: freeze_setup(net, optimizer, freeze_layer) if small_lr_layers: for layer_name in small_lr_layers: layer = getattr(net.predictor, layer_name) layer.W.update_rule.hyperparam.lr = small_initial_lr layer.b.update_rule.hyperparam.lr = small_initial_lr # Trainer updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id) trainer = training.Trainer(updater, (num_epochs_or_iter, epoch_or_iter), out=save_dir) return trainer
def adadelta(model, args): """Build adadelta.""" opt = chainer.optimizers.AdaDelta( rho=args.rho, eps=args.eps, ) opt.setup(model) opt.add_hook(WeightDecay(args.weight_decay)) return opt
def adam(model, args): """Build adam.""" opt = chainer.optimizers.Adam( alpha=args.lr, beta1=args.beta1, beta2=args.beta2, ) opt.setup(model) opt.add_hook(WeightDecay(args.weight_decay)) return opt
def from_args(target, args: argparse.Namespace): """Initialize optimizer from argparse Namespace. Args: target: for pytorch `model.parameters()`, for chainer `model` args (argparse.Namespace): parsed command-line args """ opt = chainer.optimizers.AdaDelta(rho=args.rho, eps=args.eps,) opt.setup(target) opt.add_hook(WeightDecay(args.weight_decay)) return opt
def from_args(target, args: argparse.Namespace): """Initialize optimizer from argparse Namespace. Args: target: for pytorch `model.parameters()`, for chainer `model` args (argparse.Namespace): parsed command-line args """ opt = chainer.optimizers.Adam( alpha=args.lr, beta1=args.beta1, beta2=args.beta2, ) opt.setup(target) opt.add_hook(WeightDecay(args.weight_decay)) return opt
def sgd(model, args): """Build SGD.""" opt = chainer.optimizers.SGD(lr=args.lr, ) opt.setup(model) opt.add_hook(WeightDecay(args.weight_decay)) return opt
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('resnet50', 'resnet101')) parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet50(pretrained_model='imagenet', arch='he')) elif args.model == 'resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), mean='chainercv') copyparams(model.extractor.base, ResNet101(pretrained_model='imagenet', arch='he')) model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( ConcatenatedDataset( COCOBboxDataset(split='train'), COCOBboxDataset(split='valminusminival'), ), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (90000 * 16 / args.batchsize, 'iteration'), args.out) def lr_schedule(updater): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration elif iteration < 60000 * 16 / args.batchsize: rate = 1 elif iteration < 80000 * 16 / args.batchsize: rate = 0.1 else: rate = 0.01 return base_lr * rate trainer.extend(ManualScheduler('lr', lr_schedule)) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = EpicKitchensBboxDataset(year='2018', split='train') if comm.rank == 0: indices = np.arange(len(train)) else: indices = None train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=2) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (18, 'epoch'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch')) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=1)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}.npz'), trigger=(1, 'epoch')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): args = parse_args() res = Resource(args, train=True) train, test, train_gt, test_gt = load_train_test( train_dir=const.PREPROCESSED_TRAIN_DIR, gt_dir=const.XML_DIR) res.log_info(f'Train: {len(train)}, test: {len(test)}') model = ARCHS[args.model](n_fg_class=len(const.LABELS), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train_dataset = TransformDataset( ISIC2018Task1Dataset(train, train_gt), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultithreadIterator( train_dataset, args.batchsize, n_threads=args.loaderjob) test_dataset = TransformDataset( ISIC2018Task1Dataset(test, test_gt), Transform(model.coder, model.insize, model.mean)) test_iter = chainer.iterators.MultithreadIterator(test_dataset, args.batchsize, shuffle=False, repeat=False, n_threads=args.loaderjob) optimizer = chainer.optimizers.Adam() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) trainer.extend( DetectionVOCEvaluator(test_iter, model, use_07_metric=False, label_names=const.LABELS)) trainer.extend(extensions.LogReport()) trainer.extend(extensions.observe_lr()) trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ])) trainer.extend(extensions.ProgressBar(update_interval=10)) snapshot_trigger = triggers.MaxValueTrigger(key='validation/main/map') snapshot_object_trigger = triggers.MaxValueTrigger( key='validation/main/map') trainer.extend(extensions.snapshot(filename='snapshot_best.npz'), trigger=snapshot_trigger) trainer.extend(extensions.snapshot_object(model, 'model_best.npz'), trigger=snapshot_object_trigger) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run() # save last model chainer.serializers.save_npz(os.path.join(args.out, 'snapshot_last.npz'), trainer) chainer.serializers.save_npz(os.path.join(args.out, 'model_last.npz'), model)
h = F.relu(self.fc1(x)) h = F.relu(self.fc2(h)) h = self.fc3(h) return h net = MLP() #インスタンス化 from chainer import optimizers from chainer.optimizer_hooks import WeightDecay # 最適化手法の選択 optimizer = optimizers.MomentumSGD(lr=0.001, momentum=0.9) # 学習率を 0.001 に設定 optimizer.setup(net) for param in net.params(): if param.name != 'b': # バイアス以外だったら param.update_rule.add_hook(WeightDecay(0.0001)) # 重み減衰を適用 # ------------------------------------------------------------------------------ # 学習における設定 # エポック数(↓変更可能) n_epoch = 401 # 表示するログの設定 results_train, results_valid = {}, {} results_train['loss'], results_train['accuracy'] = [], [] results_valid['loss'], results_valid['accuracy'] = [], [] count = 1 train_batch = train_iter.next() x_train, t_train = chainer.dataset.concat_examples(train_batch)
def do(): parser = argparse.ArgumentParser() parser.add_argument( '--model',choices=('ssd300','ssd512'),default='ssd300') parser.add_argument('--batchsize', type=int, default=8) parser.add_argument('--iteration', type=int, default=64) parser.add_argument('--step', type=int, nargs='*', default=[8,16]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() model = SSD300( n_fg_class=len(ssdd.labels), pretrained_model='imagenet' ) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) """ if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() """ train = TransformDataset( train_dataset, Transform(model.coder,model.insize,model.mean), ) train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize) test = test_dataset test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger= triggers.ManualScheduleTrigger(args.step, 'iteration') ) """ trainer.extend( extensions.Evaluator( test_iter, model ), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration' ) ) """ trainer.extend(extensions.ProgressBar(update_interval=1)) #trainer.extend(extensions.LogReport(trigger=1)) #trainer.extend(extensions.observe_lr(), trigger=1) #trainer.extend(extensions.PrintReport( # ['epoch', 'iteration', 'lr', # 'main/loss', 'main/loss/loc', 'main/loss/conf', # 'validation/main/map', 'elapsed_time']), # trigger=1) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.snapshot( filename='snapshot_iter_{.updater.epoch}.npz'), trigger=(4, 'iteration') ) trainer.run()
def handler(context): dataset_alias = context.datasets data = list(load_dataset_from_api(dataset_alias['train'])) np.random.seed(0) data = np.random.permutation(data) nb_data = len(data) nb_train = int(7 * nb_data // 10) train_data_raw = data[:nb_train] test_data_raw = data[nb_train:] premodel = SSD300(n_fg_class=20, pretrained_model='voc0712') model = SSD300(n_fg_class=1) copy_ssd(model, premodel) model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) fix_ssd(train_chain) train_data = DetectionDatasetFromAPI(train_data_raw) test_data = DetectionDatasetFromAPI(test_data_raw, use_difficult=True, return_difficult=True) train_data = TransformDataset( train_data, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train_data, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_data, BATCHSIZE, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_epochs, 'epoch'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([1200, 1600], 'epoch')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=['cup']), trigger=(1, 'epoch')) log_interval = 1, 'epoch' trainer.extend(extensions.LogReport(trigger=log_interval)) print_entries = [ 'epoch', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_epochs), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(nb_epochs, 'epoch')) trainer.run()
def main(): rospack = rospkg.RosPack() jsk_perception_datasets_path = osp.join( rospack.get_path('jsk_perception'), 'learning_datasets') parser = argparse.ArgumentParser() # Dataset directory parser.add_argument('--train-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'train')) parser.add_argument('--val-dataset-dir', type=str, default=osp.join(jsk_perception_datasets_path, 'kitchen_dataset', 'test')) parser.add_argument('--dataset-type', type=str, default='instance') parser.add_argument( '--model-name', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--gpu', type=int, default=0) parser.add_argument('--batch-size', type=int, default=8) parser.add_argument('--max-epoch', type=int, default=100) parser.add_argument('--out-dir', type=str, default=None) args = parser.parse_args() if (args.dataset_type == 'instance'): train_dataset = DetectionDataset(args.train_dataset_dir) elif (args.dataset_type == 'bbox'): train_dataset = BboxDetectionDataset(args.train_dataset_dir) else: print('unsuppported dataset type') return fg_label_names = train_dataset.fg_class_names if args.model_name == 'ssd300': model = SSD300( n_fg_class=len(fg_label_names), pretrained_model='imagenet') elif args.model_name == 'ssd512': model = SSD512( n_fg_class=len(fg_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size) if (args.dataset_type == 'instance'): test_dataset = DetectionDataset(args.val_dataset_dir) elif (args.dataset_type == 'bbox'): test_dataset = BboxDetectionDataset(args.val_dataset_dir) test_iter = chainer.iterators.SerialIterator( test_dataset, args.batch_size, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) now = datetime.datetime.now() timestamp = now.strftime('%Y%m%d-%H%M%S') if args.out_dir is None: out_dir = osp.join( rospkg.get_ros_home(), 'learning_logs', timestamp) else: out_dir = args.out_dir step_epoch = [args.max_epoch * 2 // 3, args.max_epoch * 5 // 6] trainer = training.Trainer( updater, (args.max_epoch, 'epoch'), out_dir) trainer.extend( extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger(step_epoch, 'epoch')) trainer.extend( DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=fg_label_names), trigger=triggers.ManualScheduleTrigger( step_epoch + [args.max_epoch], 'epoch')) log_interval = 10, 'iteration' trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( extensions.snapshot_object( model, 'model_snapshot.npz'), trigger=(args.max_epoch, 'epoch')) trainer.run()
def main(): # cuDNNのautotuneを有効にする chainer.cuda.set_max_workspace_size(512 * 1024 * 1024) chainer.config.autotune = True gpu_id = 0 batchsize = 6 out_num = 'results' log_interval = 1, 'epoch' epoch_max = 500 initial_lr = 0.0001 lr_decay_rate = 0.1 lr_decay_timing = [200, 300, 400] # モデルの設定 model = SSD300(n_fg_class=len(voc_labels), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) # GPUの設定 chainer.cuda.get_device_from_id(gpu_id).use() model.to_gpu() # データセットの設定 train_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'train') valid_dataset = MyVoTTVOCDataset( 'C:\Python_Programs\chainer_practice\Telescope_corner', 'val') # データ拡張 transformed_train_dataset = TransformDataset( train_dataset, Transform(model.coder, model.insize, model.mean)) # イテレーターの設定 train_iter = chainer.iterators.MultiprocessIterator( transformed_train_dataset, batchsize) valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False) # オプティマイザーの設定 optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) # アップデーターの設定 updater = training.updaters.StandardUpdater(train_iter, optimizer, device=gpu_id) # トレーナーの設定 trainer = training.Trainer(updater, (epoch_max, 'epoch'), out_num) trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr), trigger=triggers.ManualScheduleTrigger( lr_decay_timing, 'epoch')) trainer.extend(DetectionVOCEvaluator(valid_iter, model, use_07_metric=False, label_names=voc_labels), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map', 'elapsed_time' ]), trigger=log_interval) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'main/loss/loc', 'main/loss/conf'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(['validation/main/map'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}.npz'), trigger=(10, 'epoch')) # 途中で止めた学習を再開する場合は、trainerにスナップショットをロードして再開する # serializers.load_npz('results/snapshot_epoch_100.npz', trainer) # 学習実行 trainer.run() # 学習データの保存 model.to_cpu() serializers.save_npz('my_ssd_model.npz', model)
'loss': loss, 'loss/loc': loc_loss, 'loss/conf': conf_loss }, self) return loss train_chain = MultiboxTrainChain(model) optimizer = chainer.optimizers.MomentumSGD(1e-3) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) evaluator = DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names, ) class save_model(training.Extension): #ver 1.2 def __init__(self, model, save_name, before_iter=0, saved_dir='saved_model/',
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument("--batchsize", type=int, default=16, help="batch size") parser.add_argument("--out", default="logs", help="logs") parser.add_argument("--resume", help="resume") args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, "set_start_method"): multiprocessing.set_start_method("forkserver") p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator("pure_nccl") device = comm.intra_rank class_names = morefusion.datasets.ycb_video.class_names fg_class_names = class_names[1:] model = MaskRCNNFPNResNet50(n_fg_class=len(fg_class_names), pretrained_model="imagenet") model_coco = MaskRCNNFPNResNet50(pretrained_model="coco") _copyparams(model, model_coco) model.use_preset("evaluate") train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() if comm.rank == 0: train = chainer.datasets.ConcatenatedDataset( morefusion.datasets.YCBVideoInstanceSegmentationDataset( split="train", sampling=15), morefusion.datasets.YCBVideoSyntheticInstanceSegmentationDataset( bg_composite=True), morefusion.datasets. MySyntheticYCB20190916InstanceSegmentationDataset( # NOQA "train", bg_composite=True), ) train = transform_dataset(train, model, train=True) val = morefusion.datasets.YCBVideoInstanceSegmentationDataset( split="keyframe", sampling=1) val = transform_dataset(val, model, train=False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm, shuffle=False) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize // comm.size, n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4, ) val_iter = chainer.iterators.MultiprocessIterator( val, args.batchsize // comm.size, n_processes=args.batchsize // comm.size, shared_mem=100 * 1000 * 1000 * 4, shuffle=False, repeat=False, ) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() model.extractor.disable_update() model.rpn.disable_update() for name, link in model.namedlinks(): print(name, link.update_enabled) updater = training.updaters.StandardUpdater(train_iter, optimizer, converter=converter, device=device) max_epoch = (180e3 * 8) / 118287 trainer = training.Trainer(updater, (max_epoch, "epoch"), args.out) @make_shift("lr") def lr_schedule(trainer): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration if iteration < warm_up_duration: rate = (warm_up_rate + (1 - warm_up_rate) * iteration / warm_up_duration) else: rate = 1 for step in [120e3 / 180e3 * max_epoch, 160e3 / 180e3 * max_epoch]: if trainer.updater.epoch_detail >= step: rate *= 0.1 return base_lr * rate trainer.extend(lr_schedule) val_interval = 10000, "iteration" evaluator = InstanceSegmentationCOCOEvaluator(val_iter, model) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) if comm.rank == 0: log_interval = 10, "iteration" trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) keys = [ "epoch", "iteration", "lr", "main/loss", "main/loss/rpn/loc", "main/loss/rpn/conf", "main/loss/bbox_head/loc", "main/loss/bbox_head/conf", "main/loss/mask_head", "validation/main/map/iou=0.50:0.95/area=all/max_dets=100", ] trainer.extend(extensions.PrintReport(keys), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object(model, "model_iter_best"), trigger=training.triggers.MaxValueTrigger( "validation/main/map/iou=0.50:0.95/area=all/max_dets=100", trigger=val_interval, ), ) trainer.extend( extensions.snapshot_object(model, "model_iter_{.updater.iteration}"), trigger=(max_epoch, "epoch"), ) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--np', type=int, default=8) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=1e-3) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=16, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=1024, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=1, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator('pure_nccl') device = comm.intra_rank # Set up workspace # 12 GB GPU RAM for workspace chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024) chainer.global_config.cv_resize_backend = 'cv2' # Setup the data type # when initializing models as follows, their data types will be casted. # Weethave to forbid the usage of cudnn if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') ###################################### # Setup model ####################################### # Apply ada loss transform recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker( check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, # 'power_of_two': False, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) if comm.rank == 0: print(model) train_chain = MultiboxTrainChain(model_, comm=comm) chainer.cuda.get_device_from_id(device).use() # to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() shared_mem = 100 * 1000 * 1000 * 4 if args.dataset_dir is not None: chainer.dataset.set_dataset_root(args.dataset_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=8, n_prefetch=2, shared_mem=shared_mem) if comm.rank == 0: # NOTE: only performed on the first device test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) # if args.dtype == 'mixed16': # updater.loss_scale = 8 iteration_interval = (args.iteration, 'iteration') trainer = training.Trainer(updater, iteration_interval, args.out) # trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), # trigger=triggers.ManualScheduleTrigger( # args.step, 'iteration')) if args.batchsize != 32: warmup_attr_ratio = 0.1 # NOTE: this is confusing but it means n_iter warmup_n_epoch = 1000 lr_shift = chainerlp.extensions.ExponentialShift( 'lr', 0.1, init=args.lr * warmup_attr_ratio, warmup_attr_ratio=warmup_attr_ratio, warmup_n_epoch=warmup_n_epoch, schedule=args.step) trainer.extend(lr_shift, trigger=(1, 'iteration')) if comm.rank == 0: if not args.profiling: trainer.extend(DetectionVOCEvaluator( test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(args.snapshot_per_n_iteration, 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: if comm.rank == 0: stack.enter_context(hook) trainer.run() # store recorded results if comm.rank == 0: # NOTE: only export in the first rank recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv( os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): # Start the multiprocessing environment # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() # Set up workspace # 12 GB GPU RAM for workspace chainer.cuda.set_max_workspace_size(16 * 1024 * 1024 * 1024) # Setup the multi-node environment comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank print( '==> Successfully setup communicator: "{}" rank: {} device: {} size: {}' .format(args.communicator, comm.rank, device, comm.size)) set_random_seed(args, device) # Setup LR if args.lr is not None: lr = args.lr else: lr = 0.1 * (args.batchsize * comm.size) / 256 # TODO: why? if comm.rank == 0: print( 'LR = {} is selected based on the linear scaling rule'.format( lr)) # Setup dataset train_dir = os.path.join(args.dataset_dir, 'train') val_dir = os.path.join(args.dataset_dir, 'val') label_names = datasets.directory_parsing_label_names(train_dir) train_data = datasets.DirectoryParsingLabelDataset(train_dir) val_data = datasets.DirectoryParsingLabelDataset(val_dir) train_data = TransformDataset(train_data, ('img', 'label'), TrainTransform(_mean, args)) val_data = TransformDataset(val_data, ('img', 'label'), ValTransform(_mean, args)) print('==> [{}] Successfully finished loading dataset'.format(comm.rank)) # Initializing dataset iterators if comm.rank == 0: train_indices = np.arange(len(train_data)) val_indices = np.arange(len(val_data)) else: train_indices = None val_indices = None train_indices = chainermn.scatter_dataset(train_indices, comm, shuffle=True) val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True) train_data = train_data.slice[train_indices] val_data = val_data.slice[val_indices] train_iter = chainer.iterators.MultiprocessIterator( train_data, args.batchsize, n_processes=args.loaderjob) val_iter = iterators.MultiprocessIterator(val_data, args.batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) # Create the model kwargs = {} if args.first_bn_mixed16 and args.dtype == 'float16': print('==> Setting the first BN layer to mixed16') kwargs['first_bn_mixed16'] = True # Initialize the model net = models.__dict__[args.arch](n_class=len(label_names), **kwargs) # Following https://arxiv.org/pdf/1706.02677.pdf, # the gamma of the last BN of each resblock is initialized by zeros. for l in net.links(): if isinstance(l, Bottleneck): l.conv3.bn.gamma.data[:] = 0 # Apply ada loss transform recorder = AdaLossRecorder(sample_per_n_iter=100) # Update the model to support AdaLoss net = AdaLossScaled(net, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformBottleneck(), AdaLossTransformBasicBlock(), AdaLossTransformConv2DBNActiv(), ], verbose=args.verbose) if comm.rank == 0: # print network only in the 1-rank machine print(net) net = L.Classifier(net) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) # Setup optimizer optim = chainermn.create_multi_node_optimizer( optimizers.CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm) if args.dtype == 'mixed16': print('==> Using FP32 update for dtype=mixed16') optim.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optim.loss_scaling(interval=args.dynamic_interval, scale=None) else: optim.loss_scaling(interval=float('inf'), scale=None) optim._loss_scale_max = 1.0 # to prevent actual loss scaling optim.setup(net) # setup weight decay for param in net.params(): if param.name not in ('beta', 'gamma'): param.update_rule.add_hook(WeightDecay(args.weight_decay)) # allocate model to multiple GPUs if device >= 0: chainer.cuda.get_device(device).use() net.to_gpu() # Create an updater that implements how to update based on one train_iter input updater = chainer.training.StandardUpdater(train_iter, optim, device=device) # Setup Trainer stop_trigger = (args.epoch, 'epoch') if args.iter is not None: stop_trigger = (args.iter, 'iteration') trainer = training.Trainer(updater, stop_trigger, out=args.out) @make_shift('lr') def warmup_and_exponential_shift(trainer): """ LR schedule for training ResNet especially. NOTE: lr should be within the context. """ epoch = trainer.updater.epoch_detail warmup_epoch = 5 # NOTE: mentioned the original ResNet paper. if epoch < warmup_epoch: if lr > 0.1: warmup_rate = 0.1 / lr rate = warmup_rate \ + (1 - warmup_rate) * epoch / warmup_epoch else: rate = 1 elif epoch < 30: rate = 1 elif epoch < 60: rate = 0.1 elif epoch < 80: rate = 0.01 else: rate = 0.001 return rate * lr trainer.extend(warmup_and_exponential_shift) evaluator = chainermn.create_multi_node_evaluator( extensions.Evaluator(val_iter, net, device=device), comm) trainer.extend(evaluator, trigger=(1, 'epoch')) log_interval = 0.1, 'epoch' print_interval = 0.1, 'epoch' if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) # NOTE: may take snapshot every iteration now snapshot_label = 'epoch' if args.iter is None else 'iteration' snapshot_trigger = (args.snapshot_freq, snapshot_label) snapshot_filename = ('snapshot_' + snapshot_label + '_{.updater.' + snapshot_label + '}.npz') trainer.extend(extensions.snapshot(filename=snapshot_filename), trigger=snapshot_trigger) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'loss_scale', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: serializers.load_npz(args.resume, trainer) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: if comm.rank == 0: stack.enter_context(hook) trainer.run() # store recorded results if comm.rank == 0: # NOTE: only export in the first rank recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): # chainer.config.autotune = True # chainer.config.cudnn_fast_batch_normalization = True print("dataset", CONFIG.dataset) print("output_dir:", output_dir) if CONFIG.dataset == "tennis_serve": dataset = load_penn_action(dataset_dir=CONFIG.dataset_path, stride=CONFIG.penn_action.stride, dict_ok=False) dataset_train = dataset[:115] dataset_test = dataset[115:] elif CONFIG.dataset == "pouring": dataset_train, dataset_test = load_pouring( dataset_dir=CONFIG.dataset_path, stride=CONFIG.pouring.stride, dict_ok=False) elif CONFIG.dataset == "multiview_pouring": dataset_train, dataset_test = load_multiview_pouring( dataset_dir=CONFIG.dataset_path, stride=CONFIG.multiview_pouring.stride, dict_ok=False) else: print("dataset error.") exit() dataset_train = load_dataset(dataset_train, augment=None, img_size=CONFIG.img_size, k=CONFIG.k) dataset_test = load_dataset(dataset_test, augment=None, img_size=CONFIG.img_size, k=CONFIG.k) train_iter = MultiprocessIterator(dataset_train, batch_size=CONFIG.batchsize, n_processes=6) test_iter = MultiprocessIterator(dataset_test, batch_size=1, n_processes=6, repeat=False, shuffle=None) model = tcc(use_bn=True, k=CONFIG.k) device = chainer.get_device(OPTION.device) device.use() model.to_device(device) optimizer = make_optimizer(model) if CONFIG.weight_decay_rate != 0: for param in model.params(): param.update_rule.add_hook(WeightDecay(CONFIG.weight_decay_rate)) updater = tcc_updater({"main": train_iter}, optimizer, device) trainer = Trainer(updater, (CONFIG.iteration, 'iteration'), out=output_dir) display_interval = (100, 'iteration') plot_interval = (100, 'iteration') trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend( extensions.LogReport(trigger=display_interval, filename='log.txt')) trainer.extend(extensions.PrintReport( ["iteration", "main/loss", "test/loss", "test/tau", "elapsed_time"]), trigger=display_interval) trainer.extend(extensions.PlotReport(["main/loss", "test/loss"], "iteration", file_name="loss.png"), trigger=plot_interval) trainer.extend(evaluator(test_iter, model, device, epoch=plot_interval[0], out=output_dir), trigger=plot_interval) trainer.extend(extensions.PlotReport(["test/tau"], "iteration", file_name="tau.png"), trigger=plot_interval) trainer.extend(extensions.snapshot_object(model, "{.updater.iteration}" + ".npz"), trigger=plot_interval) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument( '--model', choices=('faster_rcnn_fpn_resnet50', 'faster_rcnn_fpn_resnet101'), default='faster_rcnn_fpn_resnet50') parser.add_argument('--batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=90000) parser.add_argument('--step', type=int, nargs='*', default=[60000, 80000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') p = multiprocessing.Process() p.start() p.join() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'faster_rcnn_fpn_resnet50': model = FasterRCNNFPNResNet50( n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') elif args.model == 'faster_rcnn_fpn_resnet101': model = FasterRCNNFPNResNet101( n_fg_class=len(coco_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = TrainChain(model) chainer.cuda.get_device_from_id(device).use() train_chain.to_gpu() train = TransformDataset( COCOBboxDataset(year='2017', split='train'), ('img', 'bbox', 'label'), transform) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] train_iter = chainer.iterators.MultithreadIterator( train, args.batchsize // comm.size) optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) optimizer.add_hook(WeightDecay(0.0001)) model.extractor.base.conv1.disable_update() model.extractor.base.res2.disable_update() for link in model.links(): if isinstance(link, L.BatchNormalization): link.disable_update() updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer( updater, (args.iteration * 16 / args.batchsize, 'iteration'), args.out) @make_shift('lr') def lr_schedule(trainer): base_lr = 0.02 * args.batchsize / 16 warm_up_duration = 500 warm_up_rate = 1 / 3 iteration = trainer.updater.iteration if iteration < warm_up_duration: rate = warm_up_rate \ + (1 - warm_up_rate) * iteration / warm_up_duration else: rate = 1 for step in args.step: if iteration >= step * 16 / args.batchsize: rate *= 0.1 return base_lr * rate trainer.extend(lr_schedule) if comm.rank == 0: log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'lr', 'main/loss', 'main/loss/rpn/loc', 'main/loss/rpn/conf', 'main/loss/head/loc', 'main/loss/head/conf']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration')) trainer.extend( extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(90000 * 16 / args.batchsize, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer, strict=False) trainer.run()
def train(opt): if opt.use_cpu: device = -1 print('[Message] use CPU') else: device = 0 print('[Message] use GPU0') annotated = get_dataset(opt) unlabeled = get_unlabel_dataset(opt) print('[Message] loaded options') train_iter = SerialIterator(annotated, opt.batch_size, shuffle=True) print('[Message] converted to iterator (train)') semi_iter = SerialIterator(unlabeled, opt.batch_size, shuffle=True) print('[Message] converted to iterator (semi)') gen = ResNetDeepLab(opt) #gen = DilatedFCN(opt) #gen = UNet(opt) if device != -1: gen.to_gpu(device) #use GPU g_optim = Adam(alpha=opt.g_lr, beta1=opt.g_beta1, beta2=opt.g_beta2) g_optim.setup(gen) if opt.g_weight_decay > 0: g_optim.add_hook(WeightDecay(opt.g_weight_decay)) print('[Message] setuped Generator') dis = FCN(opt) if device != -1: dis.to_gpu(device) #use GPU d_optim = Adam(alpha=opt.d_lr, beta1=opt.d_beta1, beta2=opt.d_beta2) d_optim.setup(dis) print('[Message] setuped Discriminator') updater = AdvSemiSeg_Updater(opt, iterator={ 'main': train_iter, 'semi': semi_iter }, optimizer={ 'gen': g_optim, 'dis': d_optim }, device=device) print('[Message] initialized Updater') trainer = Trainer(updater, (opt.max_epoch, 'epoch'), out=opt.out_dir) print('[Message] initialized Trainer') #chainer training extensions trainer.extend(ex.LogReport(log_name=None, trigger=(1, 'iteration'))) trainer.extend(ex.ProgressBar((opt.max_epoch, 'epoch'), update_interval=1)) trainer.extend( ex.PlotReport(['gen/adv_loss', 'dis/adv_loss', 'gen/semi_adv_loss'], x_key='iteration', file_name='adversarial_loss.png', trigger=(100, 'iteration'))) #test trainer.extend( ex.PlotReport(['gen/adv_loss'], x_key='iteration', file_name='adv_gen_loss.png', trigger=(100, 'iteration'))) trainer.extend( ex.PlotReport(['gen/ce_loss'], x_key='iteration', file_name='cross_entropy_loss.png', trigger=(100, 'iteration'))) trainer.extend( ex.PlotReport(['gen/semi_st_loss'], x_key='iteration', file_name='self_teach_loss.png', trigger=(100, 'iteration'))) trainer.extend( ex.PlotReport(['gen/loss', 'dis/loss', 'gen/semi_loss'], x_key='iteration', file_name='loss.png', trigger=(100, 'iteration'))) trainer.extend( ex.PlotReport(['gen/loss', 'dis/loss', 'gen/semi_loss'], x_key='epoch', file_name='loss_details.png', trigger=(5, 'epoch'))) trainer.extend( ex.PlotReport(['gen/semi_loss'], x_key='epoch', file_name='semi_loss.png', trigger=(1, 'epoch'))) #snap trainer.extend(ex.snapshot_object( gen, 'gen_snapshot_epoch-{.updater.epoch}.npz'), trigger=(opt.snap_interval_epoch, 'epoch')) trainer.extend(ex.snapshot_object( dis, 'dis_snapshot_epoch-{.updater.epoch}.npz'), trigger=(opt.snap_interval_epoch, 'epoch')) trainer.extend(lambda *args: updater.save_img(), trigger=(opt.img_interval_iteration, 'iteration'), priority=PRIORITY_READER) trainer.extend(lambda *args: updater.ignition_semi_learning(), trigger=(opt.semi_ignit_iteration, 'iteration'), priority=PRIORITY_READER) trainer.extend(lambda *args: adam_lr_poly(opt, trainer), trigger=(100, 'iteration')) print('[Message] initialized extension') print('[Message] start training ...') trainer.run() #start learning
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--dtype', type=str, choices=dtypes.keys(), default='float32', help='Select the data type of the model') parser.add_argument('--model-dir', default=None, type=str, help='Where to store models') parser.add_argument('--dataset-dir', default=None, type=str, help='Where to store datasets') parser.add_argument('--dynamic-interval', default=None, type=int, help='Interval for dynamic loss scaling') parser.add_argument('--init-scale', default=1, type=float, help='Initial scale for ada loss') parser.add_argument('--loss-scale-method', default='approx_range', type=str, help='Method for adaptive loss scaling') parser.add_argument('--scale-upper-bound', default=32800, type=float, help='Hard upper bound for each scale factor') parser.add_argument('--accum-upper-bound', default=32800, type=float, help='Accumulated upper bound for all scale factors') parser.add_argument('--update-per-n-iteration', default=100, type=int, help='Update the loss scale value per n iteration') parser.add_argument('--snapshot-per-n-iteration', default=10000, type=int, help='The frequency of taking snapshots') parser.add_argument('--n-uf', default=1e-3, type=float) parser.add_argument('--nosanity-check', default=False, action='store_true') parser.add_argument('--nouse-fp32-update', default=False, action='store_true') parser.add_argument('--profiling', default=False, action='store_true') parser.add_argument('--verbose', action='store_true', default=False, help='Verbose output') args = parser.parse_args() # Setting data types if args.dtype != 'float32': chainer.global_config.use_cudnn = 'never' chainer.global_config.dtype = dtypes[args.dtype] print('==> Setting the data type to {}'.format(args.dtype)) # Initialize model if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') # Apply adaptive loss scaling recorder = AdaLossRecorder(sample_per_n_iter=100) profiler = Profiler() sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None # Update the model to support AdaLoss # TODO: refactorize model_ = AdaLossScaled( model, init_scale=args.init_scale, cfg={ 'loss_scale_method': args.loss_scale_method, 'scale_upper_bound': args.scale_upper_bound, 'accum_upper_bound': args.accum_upper_bound, 'update_per_n_iteration': args.update_per_n_iteration, 'recorder': recorder, 'profiler': profiler, 'sanity_checker': sanity_checker, 'n_uf_threshold': args.n_uf, }, transforms=[ AdaLossTransformLinear(), AdaLossTransformConvolution2D(), ], verbose=args.verbose) # Finalize the model train_chain = MultiboxTrainChain(model_) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() cp.random.seed(0) # NOTE: we have to transfer modules explicitly to GPU model.coder.to_gpu() model.extractor.to_gpu() model.multibox.to_gpu() # Prepare dataset if args.model_dir is not None: chainer.dataset.set_dataset_root(args.model_dir) train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype])) # train_iter = chainer.iterators.MultiprocessIterator( # train, args.batchsize) # , n_processes=8, n_prefetch=2) train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize) # train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() if args.dtype == 'mixed16': if not args.nouse_fp32_update: print('==> Using FP32 update for dtype=mixed16') optimizer.use_fp32_update() # by default use fp32 update # HACK: support skipping update by existing loss scaling functionality if args.dynamic_interval is not None: optimizer.loss_scaling(interval=args.dynamic_interval, scale=None) else: optimizer.loss_scaling(interval=float('inf'), scale=None) optimizer._loss_scale_max = 1.0 # to prevent actual loss scaling optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.observe_value( 'loss_scale', lambda trainer: trainer.updater.get_optimizer('main')._loss_scale), trigger=log_interval) metrics = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] if args.dynamic_interval is not None: metrics.insert(2, 'loss_scale') trainer.extend(extensions.PrintReport(metrics), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) hook = AdaLossMonitor(sample_per_n_iter=100, verbose=args.verbose, includes=['Grad', 'Deconvolution']) recorder.trainer = trainer hook.trainer = trainer with ExitStack() as stack: stack.enter_context(hook) trainer.run() recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv')) profiler.export().to_csv(os.path.join(args.out, 'profile.csv')) if sanity_checker: sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv')) hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
def main(): parser = argparse.ArgumentParser() parser.add_argument('dataset', help="path to train json file") parser.add_argument('test_dataset', help="path to test dataset json file") parser.add_argument( '--dataset-root', help= "path to dataset root if dataset file is not already in root folder of dataset" ) parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd512') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--gpu', type=int, nargs='*', default=[]) parser.add_argument('--out', default='result') parser.add_argument('--resume') parser.add_argument('--lr', type=float, default=0.001, help="default learning rate") parser.add_argument('--port', type=int, default=1337, help="port for bbox sending") parser.add_argument('--ip', default='127.0.0.1', help="destination ip for bbox sending") parser.add_argument( '--test-image', help="path to test image that shall be displayed in bbox vis") args = parser.parse_args() if args.dataset_root is None: args.dataset_root = os.path.dirname(args.dataset) if args.model == 'ssd300': model = SSD300(n_fg_class=1, pretrained_model='imagenet') image_size = (300, 300) elif args.model == 'ssd512': model = SSD512(n_fg_class=1, pretrained_model='imagenet') image_size = (512, 512) else: raise NotImplementedError("The model you want to train does not exist") model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) train = TransformDataset( SheepDataset(args.dataset_root, args.dataset, image_size=image_size), Transform(model.coder, model.insize, model.mean)) if len(args.gpu) > 1: gpu_datasets = split_dataset_n_random(train, len(args.gpu)) if not len(gpu_datasets[0]) == len(gpu_datasets[-1]): adapted_second_split = split_dataset(gpu_datasets[-1], len(gpu_datasets[0]))[0] gpu_datasets[-1] = adapted_second_split else: gpu_datasets = [train] train_iter = [ ThreadIterator(gpu_dataset, args.batchsize) for gpu_dataset in gpu_datasets ] test = SheepDataset(args.dataset_root, args.test_dataset, image_size=image_size) test_iter = chainer.iterators.MultithreadIterator(test, args.batchsize, repeat=False, shuffle=False, n_threads=2) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.Adam(alpha=args.lr) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) if len(args.gpu) <= 1: updater = training.updaters.StandardUpdater( train_iter[0], optimizer, device=args.gpu[0] if len(args.gpu) > 0 else -1, ) else: updater = training.updaters.MultiprocessParallelUpdater( train_iter, optimizer, devices=args.gpu) updater.setup_workers() if len(args.gpu) > 0 and args.gpu[0] >= 0: chainer.backends.cuda.get_device_from_id(args.gpu[0]).use() model.to_gpu() trainer = training.Trainer(updater, (200, 'epoch'), args.out) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(1000, 'iteration')) # build logger # make sure to log all data necessary for prediction log_interval = 100, 'iteration' data_to_log = { 'image_size': image_size, 'model_type': args.model, } # add all command line arguments for argument in filter(lambda x: not x.startswith('_'), dir(args)): data_to_log[argument] = getattr(args, argument) # create callback that logs all auxiliary data the first time things get logged def backup_train_config(stats_cpu): if stats_cpu['iteration'] == log_interval: stats_cpu.update(data_to_log) trainer.extend( extensions.LogReport(trigger=log_interval, postprocess=backup_train_config)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(5000, 'iteration')) if args.test_image is not None: plot_image = train._dataset.load_image(args.test_image, resize_to=image_size) else: plot_image, _, _ = train.get_example(0) plot_image += train._transform.mean bbox_plotter = BBOXPlotter( plot_image, os.path.join(args.out, 'bboxes'), send_bboxes=True, upstream_port=args.port, upstream_ip=args.ip, ) trainer.extend(bbox_plotter, trigger=(10, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
optimizer = optimizers.AdaDelta() optimizer.setup(net) if args.lasso: #Lasso回帰でスパース化 from chainer.optimizer_hooks import Lasso for param in net.params(): if param.name != 'b': param.update_rule.add_hook(Lasso(decay)) else: #Ridge回帰で過学習抑制 from chainer.optimizer_hooks import WeightDecay for param in net.params(): if param.name != 'b': param.update_rule.add_hook(WeightDecay(decay)) #ミニバッチ学習 from chainer import Variable gx = [] gy = [] for i in range(rep): sffindx = np.random.permutation(train_len) x = Variable( np.array(train_datas)[sffindx[i:( i + bs) if (i + bs) < train_len else train_len]]) t = Variable( np.array(train_labels)[sffindx[i:( i + bs) if (i + bs) < train_len else train_len]]) net.cleargrads() loss = net(x, t)
def handler(context): dataset_alias = context.datasets trainval_2007_dataset_id = dataset_alias['trainval2007'] trainval_2012_dataset_id = dataset_alias['trainval2012'] test_2007_dataset_id = dataset_alias['test2007'] trainval_2007_dataset = list( load_dataset_from_api(trainval_2007_dataset_id)) trainval_2012_dataset = list( load_dataset_from_api(trainval_2012_dataset_id)) test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id)) if network_model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif network_model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) if USE_GPU >= 0: chainer.cuda.get_device_from_id(USE_GPU).use() model.to_gpu() trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset) trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset) test_2007 = DetectionDatasetFromAPI(test_2007_dataset, use_difficult=True, return_difficult=True) train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE) test_iter = chainer.iterators.SerialIterator(test_2007, BATCHSIZE, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=USE_GPU) trainer = training.Trainer(updater, (nb_iterations, 'iteration'), out=ABEJA_TRAINING_RESULT_DIR) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration')) trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=(10000, 'iteration')) log_interval = 100, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) print_entries = [ 'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] report_entries = [ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ] trainer.extend(Statistics(report_entries, nb_iterations, obs_key='iteration'), trigger=log_interval) trainer.extend(Tensorboard(report_entries, out_dir=log_path)) trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(nb_iterations, 'iteration')) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('multi_task_300', 'multi_task_512'), default='multi_task_300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--eval_step', type=int, nargs='*', default=[80000, 100000, 120000]) parser.add_argument('--lr_step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--lr', type=float, default=0.001) parser.add_argument('--snap_step', type=int, default=10000) parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--out', default='result') # in experiments for real experiment parser.add_argument('--resume', type=str) parser.add_argument('--detection', action='store_true', default=False) parser.add_argument('--segmentation', action='store_true', default=False) parser.add_argument('--attention', action='store_true', default=False) parser.add_argument('--dataset', default='voc', type=str) parser.add_argument('--experiment', type=str, default='final_voc') parser.add_argument('--multitask_loss', action='store_true', default=False) parser.add_argument('--dynamic_loss', action='store_true', default=False) parser.add_argument('--log_interval', type=int, default=10) parser.add_argument('--debug', action='store_true', default=False) parser.add_argument('--update_split_interval', type=int, default=100) parser.add_argument( '--loss_split', type=float, default=0.5 ) # in fact for detection, other task(segmentation) is 1-loss_split args = parser.parse_args() snap_step = args.snap_step args.snap_step = [] for step in range(snap_step, args.iteration + 1, snap_step): args.snap_step.append(step) # redefine the output path import os import time args.out = os.path.join(args.out, args.experiment, time.strftime("%Y%m%d_%H%M%S", time.localtime())) if args.model == 'multi_task_300': model = Multi_task_300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) elif args.model == 'multi_task_512': model = Multi_task_512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet', detection=args.detection, segmentation=args.segmentation, attention=args.attention) model.use_preset('evaluate') if not (args.segmentation or args.detection): raise RuntimeError train_chain = MultiboxTrainChain(model, gpu=args.gpu >= 0, use_multi_task_loss=args.multitask_loss, loss_split=args.loss_split) train_chain.cleargrads() if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() train = TransformDataset( Multi_task_VOC(voc_experiments[args.experiment][args.experiment + '_train']), Transform(model.coder, model.insize, model.mean)) train_iter = chainer.iterators.MultiprocessIterator( train, batch_size=args.batchsize) test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) test_mask = VOCSemanticSegmentationDataset(split='val') test_mask_iter = chainer.iterators.SerialIterator(test_mask, args.batchsize, repeat=False, shuffle=False) optimizer = chainer.optimizers.MomentumSGD() optimizer.setup(train_chain) # optimizer.add_hook(GradientClipping(0.1)) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) '''if args.resume: serializers.load_npz(args.resume, trainer)''' trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr), trigger=triggers.ManualScheduleTrigger( args.lr_step, 'iteration')) if args.dataset == 'voc': use_07 = True label_names = voc_bbox_label_names elif args.dataset == 'coco': label_names = coco_bbox_label_names if args.detection and not args.debug: trainer.extend(MultitaskEvaluator(test_iter, model, args.dataset, use_07, label_names=label_names), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) if args.segmentation and not args.debug: trainer.extend(MultitaskEvaluator(test_mask_iter, model, dataset=args.dataset, label_names=label_names, detection=False), trigger=triggers.ManualScheduleTrigger( args.eval_step + [args.iteration], 'iteration')) log_interval = args.log_interval, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) if args.segmentation and args.detection and args.dynamic_loss: trainer.extend( loss_split.LossSplit(trigger=(args.update_split_interval, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask', 'main/loss/loc', 'main/loss/conf', 'main/loss/split' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=triggers.ManualScheduleTrigger( args.snap_step + [args.iteration], 'iteration')) if args.resume: if 'model' in args.resume: serializers.load_npz(args.resume, model) else: serializers.load_npz(args.resume, trainer) print(args) trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300') parser.add_argument('--batchsize', type=int, default=32) parser.add_argument('--test-batchsize', type=int, default=16) parser.add_argument('--iteration', type=int, default=120000) parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000]) parser.add_argument('--out', default='result') parser.add_argument('--resume') args = parser.parse_args() comm = chainermn.create_communicator() device = comm.intra_rank if args.model == 'ssd300': model = SSD300(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') elif args.model == 'ssd512': model = SSD512(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') model.use_preset('evaluate') train_chain = MultiboxTrainChain(model) chainer.cuda.get_device_from_id(device).use() model.to_gpu() train = TransformDataset( ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')), ('img', 'mb_loc', 'mb_label'), Transform(model.coder, model.insize, model.mean)) if comm.rank == 0: indices = np.arange(len(train)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train = train.slice[indices] # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator if hasattr(multiprocessing, 'set_start_method'): multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize // comm.size, n_processes=2) if comm.rank == 0: test = VOCBboxDataset(year='2007', split='test', use_difficult=True, return_difficult=True) test_iter = chainer.iterators.SerialIterator(test, args.test_batchsize, repeat=False, shuffle=False) # initial lr is set to 1e-3 by ExponentialShift optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(), comm) optimizer.setup(train_chain) for param in train_chain.params(): if param.name == 'b': param.update_rule.add_hook(GradientScaling(2)) else: param.update_rule.add_hook(WeightDecay(0.0005)) updater = training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3), trigger=triggers.ManualScheduleTrigger( args.step, 'iteration')) if comm.rank == 0: trainer.extend(DetectionVOCEvaluator(test_iter, model, use_07_metric=True, label_names=voc_bbox_label_names), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) log_interval = 10, 'iteration' trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf', 'validation/main/map' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend(extensions.snapshot(), trigger=triggers.ManualScheduleTrigger( args.step + [args.iteration], 'iteration')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.iteration, 'iteration')) if args.resume: serializers.load_npz(args.resume, trainer) trainer.run()
def main(): START = time.time() import argparse as arg parser = arg.ArgumentParser(description='ecfpWD_n2v') parser.add_argument('--gpu', '-g', type=int, default=1, help='GPU ID') parser.add_argument('--batchsize', '-b', type=int, default=100, help='minibatch') parser.add_argument('--epoch', '-e', type=int, default=150, help='number of max iteration to evaluate') parser.add_argument('--s1', type=int, default=1) parser.add_argument('--sa1', type=int, default=1) parser.add_argument('--s2', type=int, default=1) parser.add_argument('--sa2', type=int, default=1) parser.add_argument('--s3', type=int, default=1) parser.add_argument('--sa3', type=int, default=1) parser.add_argument('--j1', type=int, default=33) parser.add_argument('--pf1', type=int, default=64) parser.add_argument('--ja1', type=int, default=17) parser.add_argument('--j2', type=int, default=23) parser.add_argument('--pf2', type=int, default=64) parser.add_argument('--ja2', type=int, default=11) parser.add_argument('--j3', type=int, default=33) parser.add_argument('--pf3', type=int, default=32) parser.add_argument('--ja3', type=int, default=17) parser.add_argument('--n_hid3', type=int, default=70) parser.add_argument('--n_hid4', type=int, default=80) parser.add_argument('--n_hid5', type=int, default=60) parser.add_argument('--n_out', type=int, default=1) parser.add_argument('--prosize', type=int, default=5762) parser.add_argument('--input', '-i', default='./dataset/hard_dataset') parser.add_argument('--output', '-o', default='./result/hard_dataset') parser.add_argument('--frequency', type=int, default=1) args = parser.parse_args(args=[]) print(args.gpu) print('GPU: ', args.gpu) print('# Minibatch-size: ', args.batchsize) print('') #------------------------------- # GPU check xp = np if args.gpu >= 0: print('GPU mode') #xp = cp #------------------------------- # Loading SMILEs for i in range(5): #i = i+4 print('Making Training dataset...') ecfp = xp.load(args.input+'/cv_'+str(i)+'/train_fingerprint.npy') ecfp = xp.asarray(ecfp, dtype='float32').reshape(-1,1024) file_interactions=xp.load(args.input+'/cv_'+str(i)+'/train_interaction.npy') print('Loading labels: train_interaction.npy') cID = xp.load(args.input+'/cv_'+str(i)+'/train_chemIDs.npy') print('Loading chemIDs: train_chemIDs.npy') with open(args.input+'/cv_'+str(i)+'/train_proIDs.txt') as f: pID = [s.strip() for s in f.readlines()] print('Loading proIDs: train_proIDs.txt') n2v_c, n2v_p = [], [] with open('./data_multi/modelpp.pickle', mode='rb') as f: modelpp = pickle.load(f) with open('./data_multi/modelcc.pickle', mode='rb') as f: modelcc = pickle.load(f) for j in cID: n2v_c.append(modelcc.wv[str(j)]) for k in pID: n2v_p.append(modelpp.wv[k]) interactions = xp.asarray(file_interactions, dtype='int32').reshape(-1,args.n_out) n2vc = np.asarray(n2v_c, dtype='float32').reshape(-1,128) n2vp = np.asarray(n2v_p, dtype='float32').reshape(-1,128) #reset memory del n2v_c, n2v_p, cID, pID, modelcc, modelpp, file_interactions gc.collect() file_sequences=xp.load(args.input+'/cv_'+str(i)+'/train_reprotein.npy') print('Loading sequences: train_reprotein.npy', flush=True) sequences = xp.asarray(file_sequences, dtype='float32').reshape(-1,1,args.prosize,plensize) # reset memory del file_sequences gc.collect() print(interactions.shape, ecfp.shape, sequences.shape, n2vc.shape, n2vp.shape, flush=True) print('Now concatenating...', flush=True) train_dataset = datasets.TupleDataset(ecfp, sequences, n2vc, n2vp, interactions) n = int(0.8 * len(train_dataset)) train_dataset, valid_dataset = train_dataset[:n], train_dataset[n:] print('train: ', len(train_dataset), flush=True) print('valid: ', len(valid_dataset), flush=True) print('pattern: ', i, flush=True) output_dir = args.output+'/'+'ecfpN2vc_mSGD'+'/'+'pattern'+str(i) os.makedirs(output_dir) #------------------------------- #reset memory again del n, sequences, interactions, ecfp, n2vc, n2vp gc.collect() #------------------------------- # Set up a neural network to train print('Set up a neural network to train', flush=True) model = MV.CNN(args.prosize, plensize, args.batchsize, args.s1, args.sa1, args.s2, args.sa2, args.s3, args.sa3, args.j1, args.pf1, args.ja1, args.j2, args.pf2, args.ja2, args.j3, args.pf3, args.ja3, args.n_hid3, args.n_hid4, args.n_hid5, args.n_out) #------------------------------- # Make a specified GPU current if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU #------------------------------- # Setup an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) #------------------------------- # L2 regularization(weight decay) for param in model.params(): if param.name != 'b': # バイアス以外だったら param.update_rule.add_hook(WeightDecay(0.00001)) # 重み減衰を適用 #------------------------------- # Set up a trainer print('Trainer is setting up...', flush=True) train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size= args.batchsize, shuffle=True) test_iter = chainer.iterators.SerialIterator(valid_dataset, batch_size= args.batchsize, repeat=False, shuffle=True) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Take a snapshot for each specified epoch trainer.extend(extensions.snapshot_object(model, 'model_snapshot_{.updater.epoch}'), trigger=(args.frequency,'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log_epoch')) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), log_name='log_iteration')) # Print selected entries of the log to stdout trainer.extend(extensions.PrintReport( ['epoch', 'elapsed_time','main/loss', 'validation/main/loss','main/accuracy','validation/main/accuracy'])) # Print some results trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Run the training trainer.run() END = time.time() print('Nice, your Learning Job is done. Total time is {} sec.'.format(END-START)) del model, train_iter, test_iter, updater, trainer gc.collect()