def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', default=20, type=int, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', default=0, type=int, help='First GPU ID') parser.add_argument('--gpu1', '-G', default=1, type=int, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', default=1000, type=int, help='Number of units') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # See train_mnist.py for the meaning of these lines model = L.Classifier(ParallelMLP(args.unit, 10, args.gpu0, args.gpu1)) chainer.backends.cuda.get_device_from_id(args.gpu0).use() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu0) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): # 辞書の読み込み dictionary = corpus.get_dictionary(create_flg=False) # 記事の読み込み contents = corpus.get_contents() # 特徴抽出 data_train = [] label_train = [] for file_name, content in contents.items(): data_train.append(corpus.get_vector(dictionary, content)) label_train.append(corpus.get_class_id(file_name)) data_train_s, data_test_s, label_train_s, label_test_s = train_test_split(data_train, label_train, test_size=0.5) N_test = len(data_test_s) # test data size N = len(data_train_s) # train data size in_units = len(data_train_s[0]) # 入力層のユニット数 (語彙数) n_units = 1000 # 隠れ層のユニット数 n_label = 9 # 出力層のユニット数 #モデルの定義 model = L.Classifier(MLP(in_units, n_units, n_label)) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) np_data_train_s = np.array(data_train_s, dtype=np.float32) np_label_train_s = np.array(label_train_s, dtype=np.int32) np_data_test_s = np.array(data_test_s, dtype=np.float32) np_label_test_s = np.array(label_test_s, dtype=np.int32) train_iter = chainer.iterators.SerialIterator(tuple_dataset.TupleDataset(np_data_train_s, np_label_train_s), 100) test_iter = chainer.iterators.SerialIterator(tuple_dataset.TupleDataset(np_data_test_s, np_label_test_s), 100, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=-1) trainer = training.Trainer(updater, (20, 'epoch'), out='result') trainer.extend(extensions.Evaluator(test_iter, model, device=-1)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot()) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(): parser = argparse.ArgumentParser(description="Chainer example: MNIST") parser.add_argument("--batchsize", "-b", type=int, default=100, help="Number of images in each mini batch") parser.add_argument("--epoch", "-e", default=20, type=int, help="Number of sweeps over the dataset to train") parser.add_argument("--gpu0", "-g", default=0, type=int, help="First GPU ID") parser.add_argument("--gpu1", "-G", default=1, type=int, help="Second GPU ID") parser.add_argument("--out", "-o", default="result_parallel", help="Directory to output the result") parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot") parser.add_argument("--unit", "-u", default=1000, type=int, help="Number of units") args = parser.parse_args() print("GPU: {}, {}".format(args.gpu0, args.gpu1)) print("# unit: {}".format(args.unit)) print("# Minibatch-size: {}".format(args.batchsize)) print("# epoch: {}".format(args.epoch)) print("") # See train_mnist.py for the meaning of these lines model = L.Classifier(ParallelMLP(784, args.unit, 10, args.gpu0, args.gpu1)) chainer.cuda.get_device(args.gpu0).use() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu0) trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph("main/loss")) trainer.extend(extensions.snapshot()) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"] ) ) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): train,test=chainer.datasets.get_mnist() train_iter=chainer.iterators.SerialIterator(train,100) test_iter=chainer.iterators.SerialIterator(test,100,repeat=False,shuffle=False) model=L.Classifier(MLP(784,10)) optimizer=chainer.optimizers.SGD() optimizer.setup(model) updater=training.StandardUpdater(train_iter,optimizer,device=-1) trainer=training.Trainer(updater,(500,'epoch'),out='result_') trainer.extend(extensions.Evaluator(test_iter,model,device=-1)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(),trigger=(100,'iteration')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch','main/loss','validation/main/loss','main/accuracy','validation/main/acuracy'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def run(datasetPath, resultPath, modelPath="", resumePath=""): # set dataset if isinstance(datasetPath, str): ds = datasetVOC(datasetPath, 32) elif isinstance(datasetPath, list): ds = datasetVOCs(datasetPath, 32) else: raise Exception("データセットパスの型が不正です。") train, test = ds.getDataset() # set model model = chainer.links.Classifier(Alex()) if os.path.isfile(modelPath): chainer.serializers.load_npz(modelPath, model) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # set evaluation model eval_model = model.copy() eval_model.train = False # train and test train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE) test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False, shuffle=False) updater = chainer.training.StandardUpdater(train_iter, optimizer, device=-1) trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"), out=resultPath) trainer.extend(extensions.Evaluator(test_iter, eval_model, device=-1)) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy"] ) ) trainer.extend(extensions.ProgressBar(update_interval=5)) trainer.extend(extensions.snapshot(filename="snapshot_epoch_{.updater.epoch}")) trainer.extend(extensions.snapshot_object(model, filename="model_epoch_{.updater.epoch}")) trainer.extend(extensions.dump_graph("main/loss")) if os.path.isfile(resumePath): chainer.serializers.load_npz(resumePath, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='MNIST tutorial') parser.add_argument('--model', default='SLP', help='Model to use') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU to use') args = parser.parse_args() model = L.Classifier(models[args.model]()) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) train_iter = chainer.iterators.MultiprocessIterator( train_data, batch_size=1, n_processes=None, shared_mem=100000000) test_iter = chainer.iterators.SerialIterator( test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer( updater, (args.iteration, 'iteration'), out=args.out) trainer.extend( extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss'], file_name='loss.png', trigger=plot_interval ), trigger=plot_interval ) trainer.extend( DetectionVOCEvaluator( test_iter, model.faster_rcnn, use_07_metric=True, label_names=voc_bbox_label_names), trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpus', '-g', type=int, nargs="*", default=[0, 1, 2, 3]) parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) # Load the datasets and mean file mean = np.load(args.mean) train = train_imagenet.PreprocessedDataset( args.train, args.root, mean, model.insize) val = train_imagenet.PreprocessedDataset( args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. devices = tuple(args.gpus) train_iters = [ chainer.iterators.MultiprocessIterator(i, args.batchsize, n_processes=args.loaderjob) for i in chainer.datasets.split_dataset_n_random(train, len(devices))] val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = updaters.MultiprocessParallelUpdater(train_iters, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) if args.test: val_interval = 5, 'epoch' log_interval = 1, 'epoch' else: val_interval = 100000, 'iteration' log_interval = 1000, 'iteration' trainer.extend(train_imagenet.TestModeEvaluator(val_iter, model, device=args.gpus[0]), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=2)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): # Introduce argparse for clarity and organization. # Starting to use higher capacity models, thus set up for GPU. parser = argparse.ArgumentParser(description='Chainer-Tutorial: MLP') parser.add_argument('--batch_size', '-b', type=int, default=128, help='Number of samples in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of times to train on data set') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID: -1 indicates CPU') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() # Load mnist data # http://docs.chainer.org/en/latest/reference/datasets.html train, test = chainer.datasets.get_mnist() # Define iterators. train_iter = chainer.iterators.SerialIterator(train, args.batch_size) test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False) # Initialize model: Loss function defaults to softmax_cross_entropy. # 784 is dimension of the inputs, 625 is n_units in hidden layer # and 10 is the output dimension. model = L.Classifier(ModernMLP(625, 10)) # Set up GPU usage if necessary. args.gpu is a condition as well as an # identification when passed to get_device(). if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() # Define optimizer (SGD, Adam, RMSprop, etc) # http://docs.chainer.org/en/latest/reference/optimizers.html # RMSprop default parameter setting: # lr=0.01, alpha=0.99, eps=1e-8 optimizer = chainer.optimizers.RMSprop() optimizer.setup(model) # Set up trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch')) # Evaluate the model at end of each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Helper functions (extensions) to monitor progress on stdout. report_params = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport(report_params)) trainer.extend(extensions.ProgressBar()) # Here we add a bit more boiler plate code to help in output of useful # information in related to training. Very intuitive and great for post # analysis. # source: # https://github.com/pfnet/chainer/blob/master/examples/mnist/train_mnist.py # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport( ['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) if args.resume: # Resume from a snapshot (NumPy NPZ format and HDF5 format available) # http://docs.chainer.org/en/latest/reference/serializers.html chainer.serializers.load_npz(args.resume, trainer) # Run trainer trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device_from_id(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset( args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # We need to change the start method of multiprocessing module if we are # using InfiniBand and MultiprocessIterator. This is because processes # often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='chainer line drawing colorization') parser.add_argument('--batchsize', '-b', type=int, default=2, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=6, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--dataset', '-i', default='./images/', help='Directory of image files.') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--seed', type=int, default=0, help='Random seed') parser.add_argument('--snapshot_interval', type=int, default=10000, help='Interval of snapshot') parser.add_argument('--display_interval', type=int, default=3, help='Interval of displaying log to console') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') root = args.dataset #model = "./model_paint" cnn = unet.UNET() #serializers.load_npz("result/model_iter_10000", cnn) dis = unet.DIS() #serializers.load_npz("result/model_dis_iter_20000", dis) l = lnet.LNET() #serializers.load_npz("models/liner_f", l) dataset = Image2ImageDataset("dat/images_color_train.dat", root + "line/", root + "color/", train=True) # dataset.set_img_dict(img_dict) train_iter = chainer.iterators.SerialIterator(dataset, args.batchsize) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current cnn.to_gpu() # Copy the model to the GPU dis.to_gpu() # Copy the model to the GPU l.to_gpu() # Setup optimizer parameters. opt = optimizers.Adam(alpha=0.0001) opt.setup(cnn) opt.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_cnn') opt_d = chainer.optimizers.Adam(alpha=0.0001) opt_d.setup(dis) opt_d.add_hook(chainer.optimizer.WeightDecay(1e-5), 'hook_dec') # Set up a trainer updater = ganUpdater( models=(cnn, dis, l), iterator={ 'main': train_iter, #'test': test_iter }, optimizer={ 'cnn': opt, 'dis': opt_d }, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) snapshot_interval = (args.snapshot_interval, 'iteration') snapshot_interval2 = (args.snapshot_interval * 2, 'iteration') trainer.extend(extensions.dump_graph('cnn/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval2) trainer.extend(extensions.snapshot_object( cnn, 'cnn_128_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( dis, 'cnn_128_dis_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object(opt, 'optimizer_'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'), )) trainer.extend( extensions.PrintReport([ 'epoch', 'cnn/loss', 'cnn/loss_rec', 'cnn/loss_adv', 'cnn/loss_tag', 'cnn/loss_l', 'dis/loss' ])) trainer.extend(extensions.ProgressBar(update_interval=20)) trainer.run() if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Save the trained model out_dir = args.out chainer.serializers.save_npz(os.path.join(out_dir, 'model_final'), cnn) chainer.serializers.save_npz(os.path.join(out_dir, 'optimizer_final'), opt)
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=40, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=10, help='Number of units') args = parser.parse_args() # load csv n_in = 32*32 print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') model = L.Classifier(MLP(n_in, args.unit, 3)) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load dataset from CSV csv = pd.read_csv('csv/images-data.csv') dd = [] for file, label in zip(csv['file'], csv['label']): print file, label # load a color image img = cv2.imread(file, cv2.IMREAD_COLOR) # color -> grayscale imggray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY) # image -> array gray = [] for y in range(len(imggray)): for x in range(len(imggray[y])): gray.append(imggray[y][x]) imgdata = np.array(gray, dtype='f') imgdata = imgdata.reshape(1, 1, 32, 32) imgdata = imgdata / 255.0 # set dataset x = imgdata y = np.array(label, dtype=np.int32) dataset = (x, y) dd.append(dataset) train, test = dd, dd train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot()) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Resume from a snapshot #chainer.serializers.load_npz(resume, trainer) # Run the training trainer.run() # Save Model serializers.save_npz('model/simple-3layer-perceptron.model', model) serializers.save_npz('model/simple-3layer-perceptron.state', optimizer) # Predictor xx = Variable(np.array([dd[1][0],]), volatile=True) y = model.predictor(xx) print y.data print np.argmax(y.data)
def main(): # This script is almost identical to train_mnist.py. The only difference is # that this script uses data-parallel computation on two GPUs. # See train_mnist.py for more details. parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='First GPU ID') parser.add_argument('--gpu1', '-G', type=int, default=1, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') chainer.cuda.get_device(args.gpu0).use() model = L.Classifier(train_mnist.MLP(args.unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ParallelUpdater implements the data-parallel gradient computation on # multiple GPUs. It accepts "devices" argument that specifies which GPU to # use. updater = training.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={'main': args.gpu0, 'second': args.gpu1}, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=32, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=30, help='Number of sweeps over the dataset to train') parser.add_argument('--centerloss', '-c', action='store_true', default=False, help='Use center loss') parser.add_argument('--alpha_ratio', '-a', type=float, default=0.5, help='alpha ratio') parser.add_argument('--lambda_ratio', '-l', type=float, default=0.1, help='lambda ratio') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() logger = setup_logger(__name__) logger.info("GPU: {}".format(args.gpu)) logger.info("# Minibatch-size: {}".format(args.batchsize)) logger.info("# epoch: {}".format(args.epoch)) logger.info("Calculate center loss: {}".format(args.centerloss)) if args.centerloss: logger.info('# alpha: {}'.format(args.alpha_ratio)) logger.info('# lambda: {}'.format(args.lambda_ratio)) NUM_CLASSES = 10 model = LeNets( out_dim=NUM_CLASSES, alpha_ratio=args.alpha_ratio, lambda_ratio=args.lambda_ratio, is_center_loss=args.centerloss, ) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(ndim=3) train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize, n_processes=4) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, n_processes=4, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Visualize Deep Features trainer.extend(VisualizeDeepFeature(train[:10000], NUM_CLASSES, args.centerloss), trigger=(1, 'epoch')) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result_u100', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=100, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = mlp.MLP(args.unit, 10) classifier_model = L.Classifier(model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # Make a specified GPU current classifier_model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(classifier_model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, classifier_model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch #trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot(), trigger=(1, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) if extensions.PlotReport.available(): # Plot graph for loss for each epoch trainer.extend(extensions.PlotReport( ['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) # Print a progress bar to stdout #trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot serializers.load_npz(args.resume, trainer) # Run the training trainer.run() serializers.save_npz('{}/mlp.model'.format(args.out), model) serializers.save_npz('{}/clf.model'.format(args.out), classifier_model)
def main(): """training script This loads specified configuration file from config/ directory. Multi-GPU is not supported. If you want, then resort to ChainerMN. """ # commandline arguments parser = argparse.ArgumentParser() # configuration file parser.add_argument('config', type=str) # training parser.add_argument('--gpu', '-g', type=int, default=0) parser.add_argument('--loader_threads', '-l', type=int, default=4) parser.add_argument('--out', '-o', default='./result/') # util parser.add_argument('--wait', type=int) args = parser.parse_args() if args.config.endswith('.py'): args.config = args.config[:-3] # setup output directory prefix = os.path.join(args.out, args.config) cnt = len(glob.glob(prefix + '-*')) while True: output_dir = prefix + '-' + str(cnt).rjust(2, '0') try: os.makedirs(output_dir) except FileExistsError: cnt += 1 else: break # load config config = importlib.import_module('.'.join(args.config.split('/'))) # save config with open(args.config + '.py', 'r') as f: with open(os.path.join(output_dir, 'config.py'), 'w') as wf: for line in f: wf.write(line) # check whether config has required information for name in ('batchsize', 'dataset', 'epoch', 'mode', 'model', 'optimizer'): assert hasattr(config, name), \ 'Configuration file do not have attribute {}!'.format(name) # wait until specified process finish # this works as a pseudo job scheduler # Linux only pid = args.wait if pid is not None: while os.path.exists('/proc/{}'.format(pid)): time.sleep(1) # set up GPU gpu = args.gpu if gpu >= 0: # if non negative GPU id is specified: use specified GPU # else (e.g. -1): use CPU chainer.cuda.get_device_from_id(gpu).use() chainer.cuda.set_max_workspace_size(1 * 1024 * 1024 * 1024) else: raise ValueError('currently, execution on CPU is not supported') chainer.global_config.autotune = True # set up model model = config.model if args.gpu >= 0: model.to_gpu() # get iterator of dataset train_dataset, val_dataset = config.dataset if args.loader_threads > 1: train_iter = chainer.iterators.MultiprocessIterator( train_dataset, config.batchsize, n_processes=args.loader_threads) val_iter = chainer.iterators.MultiprocessIterator( val_dataset, config.batchsize, repeat=False, n_processes=args.loader_threads) else: train_iter = chainer.iterators.SerialIterator( train_dataset, config.batchsize) val_iter = chainer.iterators.SerialIterator( val_dataset, config.batchsize, repeat=False) # set up optimizer # optimizer means SGD algorithms like momentum SGD optimizer = config.optimizer optimizer.setup(model) for hook in getattr(config, 'hook', []): # hook is called before optimizer's update # weight decay is one of the most common optimizer hook optimizer.add_hook(hook) # updater is a Chainer's training utility # this does the following at every iteration: # 1) prepare mini-batch from data iterator # 2) run forward and backward computation # 3) call optimizer (e.g. calculation of Adam) # 4) update parameter updater = chainer.training.StandardUpdater(train_iter, optimizer, device=gpu) # trainer is a manager class of training # this invokes updater every iteration # this also calls extensions added later at every specified interval trainer = chainer.training.Trainer(updater, (config.epoch, 'epoch'), output_dir) # evaluator calculates accuracy and loss with network on test mode # usually, validation data is used for val_iter # in this example, I just set test data for simplicity (not recommended) val_interval = (1, 'epoch') evaluator = extensions.Evaluator(val_iter, model, device=gpu) trainer.extend(evaluator, trigger=val_interval, name='val') trainer.extend(extensions.dump_graph('main/loss')) # # additional extensions # learning rate scheduling is set here for extension, trigger in getattr(config, 'extension', []): trainer.extend(extension, trigger=trigger) # log file will be added in a result directory log_report_ext = extensions.LogReport(trigger=val_interval) trainer.extend(log_report_ext) # write progress of training to standard output trainer.extend(extensions.PrintReport([ 'elapsed_time', 'epoch', 'main/loss', 'val/main/loss', 'main/accuracy', 'val/main/accuracy' ]), trigger=val_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) # keep snapshot of trained model for later use like evaluation against adversarial attacks trainer.extend(Snapshot(), trigger=(config.epoch, 'epoch')) # my implementation switches its behavior depending on chainer's config # for details on training modes, please read codes under src/ directory for mode in config.mode: setattr(chainer.config, mode, True) # this is a training loop trainer.run() # training is over print('Result: ', output_dir, flush=True)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dim-hidden', '-u', default=500, type=int, help='dimention of hidden layers') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') parser.add_argument('--vqvae', action='store_true', help='Use VQVAE') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Prepare VAE model, defined in net.py if args.vqvae: model = net.VQVAE(784, args.dimz, args.dim_hidden) else: model = net.VAE(784, args.dimz, args.dim_hidden) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup an optimizer optimizer = chainer.optimizers.Adam(1e-4) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.GradientClipping(5.)) # Initialize if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(withlabel=False) if args.test: train, _ = chainer.datasets.split_dataset(train, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(k=10))) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/rec_loss', 'validation/main/rec_loss', 'main/other_loss', 'validation/main/other_loss', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) @chainer.training.make_extension() def confirm_images(trainer): # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(28, 28)) fig.savefig(filename) plt.close() model.to_cpu() train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17] x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images( x.data, os.path.join(args.out, '{.updater.iteration}_train'.format(trainer))) save_images( x1.data, os.path.join( args.out, '{.updater.iteration}_train_reconstructed'.format(trainer))) test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61] x = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images( x.data, os.path.join(args.out, '{.updater.iteration}_test'.format(trainer))) save_images( x1.data, os.path.join( args.out, '{.updater.iteration}_test_reconstructed'.format(trainer))) # draw images from randomly sampled z if args.vqvae: z = model.sample(size=9) else: z = chainer.Variable( np.random.normal(0, 1, (9, args.dimz)).astype(np.float32)) x = model.decode(z) save_images( x.data, os.path.join(args.out, '{.updater.iteration}_sampled'.format(trainer))) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() trainer.extend(confirm_images, trigger=(args.epoch // 10, 'epoch')) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=5, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='output', help='Directory to output the graph descriptor and sample test data') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=100, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') os.makedirs(args.out, exist_ok=True) # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=os.path.join(args.out, 'chainer_model')) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # conversion print('Transpiling model to WebDNN graph descriptor') example_input = numpy.expand_dims(train[0][0], axis=0) # example input (anything ok, (batch_size, 784)) x = chainer.Variable(example_input) y = model.predictor(x) # run model (without softmax) graph = ChainerGraphConverter().convert_from_inout_vars([x], [y]) # convert graph to intermediate representation for backend in ["webgpu", "webassembly", "fallback"]: try: exec_info = generate_descriptor(backend, graph) exec_info.save(args.out) except Exception as ex: print(f"Failed generating descriptor for backend {backend}: {str(ex)}\n") else: print(f"Backend {backend} ok\n") print('Exporting test samples (for demo purpose)') test_samples_json = [] for i in range(10): image, label = test[i] test_samples_json.append({'x': image.tolist(), 'y': int(label)}) with open(os.path.join(args.out, 'test_samples.json'), 'w') as f: json.dump(test_samples_json, f)
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--model', '-m', type=str, default=None) parser.add_argument('--opt', type=str, default=None) parser.add_argument('--epoch', '-e', type=int, default=40) parser.add_argument('--looptimes', '-t', type=int, default=5) parser.add_argument('--lr', '-l', type=float, default=0.01) parser.add_argument('--batch', '-b', type=int, default=128) parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') args = parser.parse_args() if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') # Set up a neural network to train. model = L.Classifier( network.LocalPCN(class_labels=class_labels, LoopTimes=args.looptimes)) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = optimizers.NesterovAG(lr=args.lr, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-3)) num_train_samples = 45000 train_iter = iterators.SerialIterator(train[:num_train_samples], batch_size=args.batch, shuffle=True) test_iter = iterators.SerialIterator(train[num_train_samples:], batch_size=args.batch, repeat=False, shuffle=False) if args.model != None: print("loading model from " + args.model) serializers.load_npz(args.model, model) if args.opt != None: print("loading opt from " + args.opt) serializers.load_npz(args.opt, optimizer) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results') trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.LogReport(trigger=(10, 'iteration'))) trainer.extend(extensions.observe_lr(), trigger=(10, 'iteration')) # Schedule of a learning rate (LinearShift) trainer.extend( extensions.LinearShift('lr', (args.lr, args.lr * 0.1), (args.epoch * 0.5, args.epoch * 0.5 + 1)), trigger=(1, 'epoch')) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend(extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr', 'elapsed_time' ]), trigger=(1, 'iteration')) trainer.extend(extensions.ProgressBar(update_interval=1)) #Plot computation graph trainer.extend(extensions.dump_graph('main/loss')) # Train trainer.run() # Save results modelname = "./results/model" print("saving model to " + modelname) serializers.save_npz(modelname, model) optimizername = "./results/optimizer" print("saving optimizer to " + optimizername) serializers.save_npz(optimizername, optimizer)
def main(): parser = argparse.ArgumentParser(description="Chainer CIFAR example:") parser.add_argument("--dataset", "-d", default="cifar10", help="The dataset to use: cifar10 or cifar100") parser.add_argument("--batchsize", "-b", type=int, default=128, help="Number of images in each mini-batch") parser.add_argument("--epoch", "-e", type=int, default=300, help="Number of sweeps over the dataset to train") parser.add_argument("--gpu", "-g", type=int, default=0, help="GPU ID (negative value indicates CPU)") parser.add_argument("--out", "-o", default="result", help="Directory to output the result") parser.add_argument("--resume", "-r", default="", help="Resume the training from snapshot") args = parser.parse_args() print("GPU: {}".format(args.gpu)) print("# Minibatch-size: {}".format(args.batchsize)) print("# epoch: {}".format(args.epoch)) print("") # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == "cifar10": print("Using CIFAR10 dataset.") class_labels = 10 train, test = get_cifar10() elif args.dataset == "cifar100": print("Using CIFAR100 dataset.") class_labels = 100 train, test = get_cifar100() else: raise RuntimeError("Invalid dataset choice.") model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(0.1) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, "epoch"), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift("lr", 0.5), trigger=(25, "epoch")) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph("main/loss")) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, "epoch")) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport( ["epoch", "main/loss", "validation/main/loss", "main/accuracy", "validation/main/accuracy", "elapsed_time"] ) ) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): # Check if GPU is available # (ImageNet example does not support CPU execution) if not chainer.cuda.available: raise RuntimeError("ImageNet requires GPU support.") archs = { 'alex': alex.Alex, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.add_argument('--communicator', default='hierarchical') parser.set_defaults(test=False) args = parser.parse_args() # Prepare ChainerMN communicator. comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(comm.size)) print('Using {} communicator'.format(args.communicator)) print('Using {} arch'.format(args.arch)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) chainer.cuda.get_device(device).use() # Make the GPU current model.to_gpu() # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. mean = np.load(args.mean) if comm.rank == 0: train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) else: train = None val = None train = chainermn.scatter_dataset(train, comm, shuffle=True) val = chainermn.scatter_dataset(val, comm) # We need to change the start method of multiprocessing module if we are # using InfiniBand and MultiprocessIterator. This is because processes # often crash when calling fork if they are using Infiniband. # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning ) multiprocessing.set_start_method('forkserver') train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm) optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch') val_interval = (10, 'iteration') if args.test else (1, 'epoch') log_interval = (10, 'iteration') if args.test else (1, 'epoch') checkpointer = chainermn.create_multi_node_checkpointer( name='imagenet-example', comm=comm) checkpointer.maybe_load(trainer, optimizer) trainer.extend(checkpointer, trigger=checkpoint_interval) # Create a multi node evaluator from an evaluator. evaluator = TestModeEvaluator(val_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=val_interval) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
# Setting up datasets train = VOC(args.train_img_dir, args.train_anno_dir, args.train_list_dir, args.train_list_suffix) valid = VOC(args.valid_img_dir, args.valid_anno_dir, args.valid_list_dir, args.valid_list_suffix) logging.info("train: {}, valid: {}".format(len(train), len(valid))) # Iterator train_iter = iterators.MultiprocessIterator(train, args.batchsize, shared_mem=10000000) valid_iter = iterators.SerialIterator(valid, args.valid_batchsize, repeat=False, shuffle=False) # Updater updater = ParallelUpdater(train_iter, optimizer, devices=devices) trainer = training.Trainer(updater, (args.epoch, "epoch"), out=result_dir) # Extentions trainer.extend(extensions.Evaluator(valid_iter, model, device=devices["main"]), trigger=(args.valid_freq, "epoch")) trainer.extend(extensions.dump_graph("main/rpn_loss_cls", out_name="rpn_loss_cls.dot")) trainer.extend(extensions.dump_graph("main/rpn_loss_bbox", out_name="rpn_loss_bbox.dot")) trainer.extend(extensions.dump_graph("main/loss_cls", out_name="loss_cls.dot")) trainer.extend(extensions.dump_graph("main/loss_bbox", out_name="loss_bbox.dot")) trainer.extend(extensions.snapshot(trigger=(args.snapshot_iter, "iteration"))) trainer.extend(extensions.LogReport(trigger=(args.show_log_iter, "iteration"))) trainer.extend( extensions.PrintReport( [ "epoch", "iteration", "main/rpn_loss_cls", "main/rpn_loss_bbox", "main/loss_cls", "main/loss_bbox", "validation/main/rpn_loss_cls",
def main(): parser = argparse.ArgumentParser(description='CNN Shogi:') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of data in each mini-batch') parser.add_argument('--alpha', '-a', type=float, default=0.001, help='Alpha parameter of Adam') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--datasize', '-d', type=int, default=1000, help='Number of data') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') train, test = get_data(args.datasize) model = net.Model() classifier = L.Classifier(model) # GPUを使う場合 if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() classifier.to_gpu() # trainerの設定 optimizer = chainer.optimizers.Adam(alpha=args.alpha) optimizer.setup(classifier) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(TestModeEvaluator(test_iter, classifier, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) #-origin- #trainer.extend(extensions.snapshot_object( # target=model, filename='snapshot', trigger=(args.epoch, 'epoch'))) trainer.extend( extensions.snapshot_object(target=model, filename='snapshot')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: FCIS') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument( '--lr', '-l', type=float, default=0.0005, help='Default value is for 1 GPU.\n' 'The learning rate should be multiplied by the number of gpu') parser.add_argument('--lr-cooldown-factor', '-lcf', type=float, default=0.1) parser.add_argument('--epoch', '-e', type=int, default=42) parser.add_argument('--cooldown-epoch', '-ce', type=int, default=28) args = parser.parse_args() # chainermn comm = chainermn.create_communicator() device = comm.intra_rank np.random.seed(args.seed) # model fcis = FCISResNet101(n_fg_class=len(sbd_instance_segmentation_label_names), pretrained_model='imagenet', iter2=False) fcis.use_preset('evaluate') model = FCISTrainChain(fcis) chainer.cuda.get_device_from_id(device).use() model.to_gpu() # dataset train_dataset = TransformDataset( SBDInstanceSegmentationDataset(split='train'), ('img', 'mask', 'label', 'bbox', 'scale'), Transform(model.fcis)) if comm.rank == 0: indices = np.arange(len(train_dataset)) else: indices = None indices = chainermn.scatter_dataset(indices, comm, shuffle=True) train_dataset = train_dataset.slice[indices] train_iter = chainer.iterators.SerialIterator(train_dataset, batch_size=1) if comm.rank == 0: test_dataset = SBDInstanceSegmentationDataset(split='val') test_iter = chainer.iterators.SerialIterator(test_dataset, batch_size=1, repeat=False, shuffle=False) # optimizer optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm) optimizer.setup(model) model.fcis.head.conv1.W.update_rule.add_hook(GradientScaling(3.0)) model.fcis.head.conv1.b.update_rule.add_hook(GradientScaling(3.0)) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) for param in model.params(): if param.name in ['beta', 'gamma']: param.update_rule.enabled = False model.fcis.extractor.conv1.disable_update() model.fcis.extractor.res2.disable_update() updater = chainer.training.updater.StandardUpdater( train_iter, optimizer, converter=concat_examples, device=device) trainer = chainer.training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # lr scheduler trainer.extend(chainer.training.extensions.ExponentialShift( 'lr', args.lr_cooldown_factor, init=args.lr), trigger=(args.cooldown_epoch, 'epoch')) if comm.rank == 0: # interval log_interval = 100, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' # training extensions trainer.extend(extensions.snapshot_object( model.fcis, filename='snapshot_model.npz'), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend( extensions.LogReport(log_name='log.json', trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(InstanceSegmentationVOCEvaluator( test_iter, model.fcis, iou_thresh=0.5, use_07_metric=True, label_names=sbd_instance_segmentation_label_names), trigger=ManualScheduleTrigger([ len(train_dataset) * args.cooldown_epoch, len(train_dataset) * args.epoch ], 'iteration')) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: CIFAR10') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the mini_cifar to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--dataset', '-d', default='mini_cifar/train', help='Directory for train mini_cifar') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = CifarCNN(10) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the Cifar-10 mini_cifar # trainとvalに分ける train, val = chainer.datasets.split_dataset_random( MyCifarDataset(args.dataset), 1000) print('train data : {}'.format(len(train))) print('val data : {}'.format(len(val))) train_iter = chainer.iterators.SerialIterator(train, args.batchsize, repeat=True, shuffle=True) val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test mini_cifar for each epoch trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(filename='snapshot_{.updater.epoch}'), trigger=(20, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_{.updater.epoch}'), trigger=(1, 'epoch')) trainer.extend(extensions.LogReport()) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar(update_interval=1)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) with chainer.using_config('train', True): trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerMN example: pipelined neural network') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank else: comm = chainermn.create_communicator('naive') device = -1 if comm.size != 2: raise ValueError( 'This example can only be executed on exactly 2 processes.') if comm.rank == 0: print('==========================================') if args.gpu: print('Using GPUs') print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') if comm.rank == 0: model = L.Classifier(MLP0(comm, args.unit)) elif comm.rank == 1: model = MLP1(comm, args.unit, 10) if device >= 0: chainer.cuda.get_device_from_id(device).use() model.to_gpu() optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Iterate dataset only on worker 0. train, test = chainer.datasets.get_mnist() if comm.rank == 1: train = chainermn.datasets.create_empty_dataset(train) test = chainermn.datasets.create_empty_dataset(test) train_iter = chainer.iterators.SerialIterator( train, args.batchsize, shuffle=False) test_iter = chainer.iterators.SerialIterator( test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Some display and output extentions are necessary only for worker 0. if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) trainer.run()
def main(args): # Initialize the model to train model = models.archs[args.arch]() if args.finetune and hasattr(model, 'finetuned_model_path'): utils.finetuning.load_param(model.finetuned_model_path, model, args.ignore) #model.finetune = True if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() nowt = datetime.datetime.today() outputdir = args.out + '/' + args.arch + '/' + nowt.strftime("%Y%m%d-%H%M") + '_bs' + str(args.batchsize) if args.test and args.initmodel is not None: outputdir = os.path.dirname(args.initmodel) # Load the datasets and mean file mean = None if hasattr(model, 'mean_value'): mean = makeMeanImage(model.mean_value) else: mean = np.load(args.mean) assert mean is not None train = ppds.PreprocessedDataset(args.train, args.root, mean, model.insize) val = ppds.PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, shuffle=False, n_processes=args.loaderjob) #val_iter = chainer.iterators.MultiprocessIterator( # val, args.val_batchsize, repeat=False, shuffle=False, n_processes=args.loaderjob) val_iter = chainer.iterators.SerialIterator( val, args.val_batchsize, repeat=False, shuffle=False) # Set up an optimizer optimizer = optimizers[args.opt]() #if args.opt == 'momentumsgd': if hasattr(optimizer, 'lr'): optimizer.lr = args.baselr if hasattr(optimizer, 'momentum'): optimizer.momentum = args.momentum optimizer.setup(model) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), outputdir) #val_interval = (10 if args.test else int(len(train) / args.batchsize)), 'iteration' val_interval = (10, 'iteration') if args.test else (1, 'epoch') snapshot_interval = (10, 'iteration') if args.test else (4, 'epoch') log_interval = (10 if args.test else 200), 'iteration' # Copy the chain with shared parameters to flip 'train' flag only in test eval_model = model.copy() eval_model.train = False if not args.test: val_evaluator = extensions.Evaluator(val_iter, eval_model, device=args.gpu) else: val_evaluator = utils.EvaluatorPlus(val_iter, eval_model, device=args.gpu) if 'googlenet' in args.arch: val_evaluator.lastname = 'validation/main/loss3' trainer.extend(val_evaluator, trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(500, 'iteration')) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.opt == 'momentumsgd': trainer.extend(extensions.ExponentialShift('lr', args.gamma), trigger=(1, 'epoch')) if args.resume: chainer.serializers.load_npz(args.resume, trainer) if not args.test: chainer.serializers.save_npz(outputdir + '/model0', model) trainer.run() chainer.serializers.save_npz(outputdir + '/model', model) with open(outputdir + '/args.txt', 'w') as o: print(args, file=o) results = val_evaluator(trainer) results['outputdir'] = outputdir if args.test: print(val_evaluator.confmat) categories = utils.io.load_categories(args.categories) confmat_csv_name = args.initmodel + '.csv' confmat_fig_name = args.initmodel + '.eps' utils.io.save_confmat_csv(confmat_csv_name, val_evaluator.confmat, categories) utils.io.save_confmat_fig(confmat_fig_name, val_evaluator.confmat, categories, mode="rate", saveFormat="eps") return results
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--train', default='train.txt', type=str, help='File name of train data') parser.add_argument('--test', default='validation.txt', type=str, help='File name of validation data') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--mean', default=None, help='mean file (computed by compute_mean.py)') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. train = image_dataset.ImageDataset(args.train, args.root, max_size=128, mean=args.mean) test = image_dataset.ImageDataset(args.test, args.root, max_size=128, mean=args.mean) model = L.Classifier(alexnet.FromCaffeAlexnet(1), lossfun=F.mean_squared_error) original_model = pickle.load(open('alexnet.pkl', 'rb')) copy_model(original_model, model.predictor) model.compute_accuracy = False if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.Adam() optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) optimizer.add_hook( DelGradient(["conv1", "conv2", "conv3", "conv4", "conv5"])) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser( description='ChainerCV training example: Faster R-CNN') parser.add_argument('--dataset', choices=('voc07', 'voc0712'), help='The dataset to use: VOC07, VOC07+12', default='voc07') parser.add_argument('--gpu', '-g', type=int, default=-1) parser.add_argument('--lr', '-l', type=float, default=1e-3) parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--seed', '-s', type=int, default=0) parser.add_argument('--step_size', '-ss', type=int, default=50000) parser.add_argument('--iteration', '-i', type=int, default=70000) args = parser.parse_args() np.random.seed(args.seed) if args.dataset == 'voc07': train_data = VOCBboxDataset(split='trainval', year='2007') elif args.dataset == 'voc0712': train_data = ConcatenatedDataset( VOCBboxDataset(year='2007', split='trainval'), VOCBboxDataset(year='2012', split='trainval')) comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank n_node = comm.intra_rank n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() total_batch_size = n_gpu args.lr = args.lr * total_batch_size test_data = VOCBboxDataset(split='test', year='2007', use_difficult=True, return_difficult=True) faster_rcnn = FasterRCNNVGG16(n_fg_class=len(voc_bbox_label_names), pretrained_model='imagenet') faster_rcnn.use_preset('evaluate') model = FasterRCNNTrainChain(faster_rcnn) model.to_gpu() optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9), comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005)) train_data = TransformDataset(train_data, Transform(faster_rcnn)) if comm.rank != 0: train_data = None test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1) test_iter = chainer.iterators.SerialIterator(test_data, batch_size=1, repeat=False, shuffle=False) updater = chainer.training.updaters.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.iteration, 'iteration'), out=args.out) log_interval = 20, 'iteration' plot_interval = 3000, 'iteration' print_interval = 20, 'iteration' evaluator = DetectionVOCEvaluator(test_iter, model, device=device, use_07_metric=True, label_names=voc_bbox_label_names) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator, trigger=ManualScheduleTrigger( [args.step_size, args.iteration], 'iteration')) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=(args.step_size, 'iteration')) if comm.rank == 0: trainer.extend(extensions.snapshot_object(model.faster_rcnn, 'snapshot_model.npz'), trigger=(args.iteration, 'iteration')) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/map', ]), trigger=print_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport(['main/loss'], file_name='loss.png', trigger=plot_interval), trigger=plot_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( formatter_class=argparse.ArgumentDefaultsHelpFormatter, ) parser.add_argument( 'dataset', choices=['visible+occlusion', 'synthetic', 'occlusion'], help='The dataset.', ) parser.add_argument('--model', '-m', choices=['vgg16', 'resnet50', 'resnet101'], default='resnet50', help='Base model of Mask R-CNN.') parser.add_argument('--pooling-func', '-pf', choices=['pooling', 'align', 'resize'], default='align', help='Pooling function.') parser.add_argument('--gpu', '-g', type=int, help='GPU id.') parser.add_argument('--multi-node', '-mn', action='store_true', help='use multi node') parser.add_argument('--mask-loss', default='softmax', choices=contrib.models.MaskRCNN.mask_losses, help='mask loss mode') default_max_epoch = (180e3 * 8) / 118287 * 3 # x3 parser.add_argument('--max-epoch', type=float, default=default_max_epoch, help='epoch') args = parser.parse_args() if args.multi_node: import chainermn comm = chainermn.create_communicator('hierarchical') device = comm.intra_rank args.n_node = comm.inter_size args.n_gpu = comm.size chainer.cuda.get_device_from_id(device).use() else: args.n_node = 1 args.n_gpu = 1 chainer.cuda.get_device_from_id(args.gpu).use() device = args.gpu args.seed = 0 now = datetime.datetime.now() args.timestamp = now.isoformat() args.out = osp.join( here, 'logs/train_mrcnn_lbl', now.strftime('%Y%m%d_%H%M%S'), ) # 0.00125 * 8 = 0.01 in original args.batch_size = 1 * args.n_gpu args.lr = 0.00125 * args.batch_size args.weight_decay = 0.0001 # lr / 10 at 120k iteration with # 160k iteration * 16 batchsize in original args.step_size = [(120e3 / 180e3) * args.max_epoch, (160e3 / 180e3) * args.max_epoch] random.seed(args.seed) np.random.seed(args.seed) # Default Config args.min_size = 800 args.max_size = 1333 args.anchor_scales = (2, 4, 8, 16, 32) if args.dataset == 'visible+occlusion': train_data1 = contrib.datasets.ARC2017RealInstancesDataset( 'train', aug='standard') train_data1 = MaskRcnnDataset(train_data1, zero_to_unlabeled=True) train_data2 = contrib.datasets.ARC2017RealInstancesDataset( 'test', aug='standard') train_data2 = MaskRcnnDataset(train_data2, zero_to_unlabeled=True) train_data3 = contrib.datasets.ARC2017OcclusionDataset('train', do_aug=True) train_data3 = MaskRcnnDataset(train_data3) train_data = chainer.datasets.ConcatenatedDataset( train_data1, train_data2, train_data3, ) elif args.dataset == 'synthetic': train_data = contrib.datasets.ARC2017SyntheticInstancesDataset( do_aug=True, aug_level='all') train_data = MaskRcnnDataset(train_data) elif args.dataset == 'occlusion': train_data = contrib.datasets.ARC2017OcclusionDataset('train', do_aug=True) train_data = MaskRcnnDataset(train_data) else: raise ValueError test_data = contrib.datasets.ARC2017OcclusionDataset('test') instance_class_names = test_data.class_names[1:] test_data_list = test_data.get_video_datasets() del test_data test_data_list = [MaskRcnnDataset(td) for td in test_data_list] if args.pooling_func == 'align': pooling_func = cmr.functions.roi_align_2d elif args.pooling_func == 'pooling': pooling_func = chainer.functions.roi_pooling_2d elif args.pooling_func == 'resize': pooling_func = cmr.functions.crop_and_resize else: raise ValueError if args.model in ['resnet50', 'resnet101']: n_layers = int(args.model.lstrip('resnet')) mask_rcnn = contrib.models.MaskRCNNResNet( n_layers=n_layers, n_fg_class=len(instance_class_names), pooling_func=pooling_func, anchor_scales=args.anchor_scales, min_size=args.min_size, max_size=args.max_size, mask_loss=args.mask_loss, ) else: raise ValueError model = contrib.models.MaskRCNNTrainChain(mask_rcnn) if args.multi_node or args.gpu >= 0: model.to_gpu() optimizer = chainer.optimizers.MomentumSGD(lr=args.lr, momentum=0.9) if args.multi_node: optimizer = chainermn.create_multi_node_optimizer(optimizer, comm) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.weight_decay)) for link in mask_rcnn.links(): if isinstance(link, cmr.links.AffineChannel2D): link.disable_update() train_data = chainer.datasets.TransformDataset( train_data, cmr.datasets.MaskRCNNTransform(mask_rcnn)) test_data_list = [ chainer.datasets.TransformDataset( td, cmr.datasets.MaskRCNNTransform(mask_rcnn, train=False)) for td in test_data_list ] test_concat_data = chainer.datasets.ConcatenatedDataset(*test_data_list) if args.multi_node: # XXX: test_data is only used on device0 if comm.rank != 0: train_data = None # test_data = None train_data = chainermn.scatter_dataset(train_data, comm, shuffle=True) # test_data = chainermn.scatter_dataset(test_data, comm) train_iter = chainer.iterators.SerialIterator(train_data, batch_size=1) test_iters = { i: chainer.iterators.SerialIterator(td, batch_size=1, repeat=False, shuffle=False) for i, td in enumerate(test_data_list) } test_concat_iter = chainer.iterators.SerialIterator(test_concat_data, batch_size=1, repeat=False, shuffle=False) converter = functools.partial( cmr.datasets.concat_examples, padding=0, # img, bboxes, labels, masks, scales indices_concat=[0, 2, 3, 4], # img, _, labels, masks, scales indices_to_device=[0, 1], # img, bbox ) updater = chainer.training.updater.StandardUpdater(train_iter, optimizer, device=device, converter=converter) trainer = training.Trainer(updater, (args.max_epoch, 'epoch'), out=args.out) trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=training.triggers.ManualScheduleTrigger( args.step_size, 'epoch')) eval_interval = 1, 'epoch' log_interval = 20, 'iteration' plot_interval = 0.1, 'epoch' print_interval = 20, 'iteration' if not args.multi_node or comm.rank == 0: evaluator = contrib.extensions.InstanceSegmentationVOCEvaluator( test_iters, model.mask_rcnn, device=device, use_07_metric=False, label_names=instance_class_names) trainer.extend(evaluator, trigger=eval_interval) trainer.extend(extensions.snapshot_object(model.mask_rcnn, 'snapshot_model.npz'), trigger=training.triggers.MaxValueTrigger( 'validation/main/mpq', eval_interval)) args.git_hash = cmr.utils.git_hash() args.hostname = socket.gethostname() trainer.extend(fcn.extensions.ParamsReport(args.__dict__)) trainer.extend(contrib.extensions.InstanceSegmentationVisReport( test_concat_iter, model.mask_rcnn, label_names=instance_class_names), trigger=eval_interval) trainer.extend(chainer.training.extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend( extensions.PrintReport([ 'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss', 'validation/main/mpq' ]), trigger=print_interval, ) trainer.extend(extensions.ProgressBar(update_interval=10)) # plot assert extensions.PlotReport.available() trainer.extend( extensions.PlotReport( [ 'main/loss', 'main/roi_loc_loss', 'main/roi_cls_loss', 'main/roi_mask_loss', 'main/rpn_loc_loss', 'main/rpn_cls_loss' ], file_name='loss.png', trigger=plot_interval, ), trigger=plot_interval, ) trainer.extend( extensions.PlotReport([ 'validation/main/map', 'validation/main/msq', 'validation/main/mdq', 'validation/main/mpq' ], file_name='accuracy.png', trigger=plot_interval), trigger=eval_interval, ) trainer.extend(extensions.dump_graph('main/loss')) trainer.run()
def main(): parser = argparse.ArgumentParser( description= 'Fully Convolutional Dual Center Pose Proposal Network for Pose Estimation' ) parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=200, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='results/dual_cp', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--snapshot_interval', type=int, default=1000, help='Interval of snapshot') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--train_resnet', type=bool, default=False, help='train resnet') parser.add_argument('--train-resnet', dest='train_resnet', action='store_true') parser.set_defaults(train_resnet=False) parser.add_argument('--no-accuracy', dest='compute_acc', action='store_false') parser.set_defaults(compute_acc=True) parser.add_argument('--no-pose-accuracy', dest='compute_pose_acc', action='store_false') parser.set_defaults(compute_pose_acc=True) args = parser.parse_args() compute_class_accuracy = args.compute_acc compute_pose_accuracy = args.compute_pose_acc and args.compute_acc print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('# compute class accuracy: {}'.format(compute_class_accuracy)) print('# compute pose accuracy: {}'.format(compute_pose_accuracy)) print('') im_size = (640, 480) objs = np.arange(3) + 1 n_class = len(objs) + 1 train_path = os.path.join(os.getcwd(), root, 'train_data/JSK_Objects/train') bg_path = os.path.join(os.getcwd(), root, 'train_data/MS_COCO/train2017') # bg_path = os.path.join(os.getcwd(), root, 'train_data/VOCdevkit/VOC2012/JPEGImages') caffe_model = 'ResNet-50-model.caffemodel' distance_sanity = 0.05 output_scale = 0.6 eps = 0.05 interval = 15 chainer.using_config('cudnn_deterministic', True) model = DualCPNetClassifier(DualCenterProposalNetworkRes50_predict7( n_class=n_class, pretrained_model=not args.train_resnet), basepath=train_path, im_size=im_size, distance_sanity=distance_sanity, compute_class_accuracy=compute_class_accuracy, compute_pose_accuracy=compute_pose_accuracy, output_scale=output_scale) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001)) # load train data train = JSKPoseEstimationAutoContextDataset(train_path, objs, bg_path, interval=interval, iteration_per_epoch=1000, mode='test', resize_rate=0.5, metric_filter=output_scale + eps) # load test data # test = JSKPoseEstimationAutoContextDataset(train_path, objs, bg_path, # interval=interval, # mode='train', # resize_rate=0.5, # metric_filter=output_scale + eps) test = JSKPoseEstimationDataset(train_path, objs, mode='train', interval=interval, resize_rate=0.5, metric_filter=output_scale + eps) print "number of train data : ", train.__len__() print "number of test data : ", test.__len__() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) # test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, # repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch evaluator = extensions.Evaluator(test_iter, model, device=args.gpu) evaluator.default_name = 'val' trainer.extend(evaluator) # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot and snapshot object for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/l_cls', 'main/l_cp', 'main/l_ocp', 'main/cls_acc', 'main/ocp_acc', 'main/rot_acc', 'val/main/l_cls', 'val/main/l_cp', 'val/main/l_ocp', 'val/main/cls_acc', 'val/main/ocp_acc', 'val/main/rot_acc', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) else: npz_name = 'DualCenterProposalNetworkRes50_jsk_class{}.npz' caffemodel_name = 'ResNet-50-model.caffemodel' path = os.path.join(root, 'trained_data/', npz_name.format(n_class)) path_caffemodel = os.path.join(root, 'trained_data/', caffemodel_name) print 'npz model path : ' + path print 'caffe model path : ' + path_caffemodel download.cache_or_load_file( path, lambda path: _make_chainermodel_npz(path, path_caffemodel, model, n_class), lambda path: serializers.load_npz(path, model)) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser() parser.add_argument('--gpu', type=int, default=-1) parser.add_argument('--batchsize', type=int, default=12) parser.add_argument('--class_weight', type=str, default='class_weight.npy') parser.add_argument('--out', type=str, default='result') args = parser.parse_args() # Triggers log_trigger = (50, 'iteration') validation_trigger = (2000, 'iteration') end_trigger = (16000, 'iteration') # Dataset train = CamVidDataset(split='train') train = TransformDataset(train, transform) val = CamVidDataset(split='val') # Iterator train_iter = iterators.MultiprocessIterator(train, args.batchsize) val_iter = iterators.MultiprocessIterator( val, args.batchsize, shuffle=False, repeat=False) # Model class_weight = np.load(args.class_weight) model = SegNetBasic(n_class=11) model = PixelwiseSoftmaxClassifier( model, class_weight=class_weight) if args.gpu >= 0: # Make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU # Optimizer optimizer = optimizers.MomentumSGD(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005)) # Updater updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) # Trainer trainer = training.Trainer(updater, end_trigger, out=args.out) trainer.extend(extensions.LogReport(trigger=log_trigger)) trainer.extend(extensions.observe_lr(), trigger=log_trigger) trainer.extend(extensions.dump_graph('main/loss')) if extensions.PlotReport.available(): trainer.extend(extensions.PlotReport( ['main/loss'], x_key='iteration', file_name='loss.png')) trainer.extend(extensions.PlotReport( ['validation/main/miou'], x_key='iteration', file_name='miou.png')) trainer.extend(extensions.snapshot_object( model.predictor, filename='model_iteration-{.updater.iteration}'), trigger=end_trigger) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'elapsed_time', 'lr', 'main/loss', 'validation/main/miou', 'validation/main/mean_class_accuracy', 'validation/main/pixel_accuracy']), trigger=log_trigger) trainer.extend(extensions.ProgressBar(update_interval=10)) trainer.extend( SemanticSegmentationEvaluator( val_iter, model.predictor, camvid_label_names), trigger=validation_trigger) trainer.run()
test_iter = I.SerialIterator(test, batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer) trainer = training.Trainer(updater, (epoch, 'epoch')) eval_model = model.copy() eval_nfp = eval_model.predictor trainer.extend(E.LogReport(trigger=(2, 'iteration'))) trainer.extend(E.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'validation/main/loss', 'validation/main/accuracy','elapsed_time'])) trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss.png')) trainer.extend(E.PlotReport(['main/accuracy', 'validation/main/accuracy'], x_key='epoch', file_name='accuracy.png')) #trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}')) #trainer.extend(extensions.snapshot_object(model.predictor, filename='model_epoch-{.updater.epoch}')) trainer.extend(E.Evaluator(val_iter, model), trigger=(2, 'iteration')) trainer.extend(E.dump_graph('main/loss')) trainer.run() print('test') evaluator = E.Evaluator(test_iter, eval_model) result = evaluator() print('valid accuracy:', float(result['main/accuracy'])) # save model chainer.serializers.save_npz('model.npz', model)
warmup(model, train_iter) model.rpn_train = True # optimizer = optimizers.Adam() # optimizer.setup(model) optimizer = optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=0) trainer = training.Trainer(updater, (100, 'epoch'), out='tests/train_test') trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/RPN/rpn_loss', 'main/RPN/rpn_loss_cls', 'main/RPN/rpn_cls_accuracy', 'main/RPN/rpn_loss_bbox', 'elapsed_time', ]), trigger=(100, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'), trigger=(1000, 'iteration')) trainer.extend(extensions.PlotReport(['main/RPN/rpn_loss'], trigger=(100, 'iteration'))) trainer.extend( extensions.dump_graph('main/RPN/rpn_loss', out_name='rpn_loss.dot')) trainer.run()
n_iteration = max(len(train) // config.batchsize, 1) test_interval = (max(len(train) // len(test), 1), 'iteration') save_interval = (5, 'epoch') log_interval = (max(n_iteration // 1, 1), 'iteration') progressbar_interval = 3 imgview_face_interval = (5, 'iteration') imgview_weight_interval = (1, 'epoch') logger.info('Test interval : {}'.format(test_interval)) logger.info('Save interval : {}'.format(save_interval)) logger.info('Log interval : {}'.format(log_interval)) logger.info('ProgressBar interval : {}'.format(progressbar_interval)) logger.info('ImgView face interval : {}'.format(imgview_face_interval)) logger.info('ImgView weight interval : {}'.format(imgview_weight_interval)) # Extensions trainer.extend(extensions.dump_graph('main/loss'), trigger=save_interval) trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=save_interval) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=save_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'validation/main/loss']), trigger=log_interval) trainer.extend( extensions.ProgressBar(update_interval=progressbar_interval)) # My extensions # Sequential Evaluator
model.rcnn_train = True # optimizer = optimizers.Adam() # optimizer.setup(model) optimizer = optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=0) trainer = training.Trainer(updater, (100, 'epoch'), out='train_rcnn') trainer.extend(extensions.LogReport(trigger=(100, 'iteration'))) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss_cls', 'main/cls_accuracy', 'main/loss_bbox', 'main/loss_rcnn', 'elapsed_time', ]), trigger=(100, 'iteration')) trainer.extend( extensions.snapshot_object(model, 'snapshot_{.updater.iteration}'), trigger=(1000, 'iteration')) trainer.extend(extensions.PlotReport(['main/loss_rcnn'], trigger=(100, 'iteration'))) trainer.extend(extensions.PlotReport(['main/cls_accuracy'], trigger=(100, 'iteration'))) trainer.extend( extensions.dump_graph('main/loss_rcnn', out_name='loss_rcnn.dot')) trainer.run()
def main(arg_list=None): parser = argparse.ArgumentParser(description='Chainer LSTM') parser.add_argument('--epoch', '-e', type=int, nargs='+', default=[20], help='Number of sweeps over the dataset to train') parser.add_argument('--optimizer', '-o', nargs='+', default=['momentumsgd'], help='Optimizer (sgd, momentumsgd, adam)') parser.add_argument('--batch-size', '-b', type=int, nargs='+', default=[128], help='Number of training points in each mini-batch') parser.add_argument('--lr', type=float, nargs='+', default=[1e-2, 1e-3, 1e-4, 1e-5], help='Learning rate') parser.add_argument('--early-stopping', type=str2bool, nargs='+', default=[True], help="True if early stopping should be enabled") parser.add_argument( '--network', '-n', default='ff', help= 'Neural network type, either "ff", "tdnn", "lstm", "zoneoutlstm", "peepholelstm" or "gru". Setting any recurrent network implies "--shuffle-sequences"' ) parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--units', '-u', type=int, nargs='+', default=[1024], help='Number of units') parser.add_argument('--layers', '-l', type=int, default=2, help='Number of hidden layers') parser.add_argument('--activation', '-a', default='relu', help='FF activation function (sigmoid, tanh or relu)') parser.add_argument('--tdnn-ksize', type=int, nargs='+', default=[5], help='TDNN kernel size') parser.add_argument('--bproplen', type=int, default=20, help='Backpropagation length') parser.add_argument('--timedelay', type=int, default=0, help='Delay target values by this many time steps') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') parser.add_argument('--splice', type=int, default=0, help='Splicing size') parser.add_argument( '--dropout', '-d', type=float, nargs='+', default=[0], help= 'Dropout rate (0 to disable). In case of Zoneout LSTM, this parameter has 2 arguments: c_ratio h_ratio' ) parser.add_argument('--ft', default='final.feature_transform', help='Kaldi feature transform file') parser.add_argument('--tri', action='store_true', help='Use triphones') parser.add_argument( '--shuffle-sequences', action='store_true', help= 'True if sequences should be shuffled as a whole, otherwise all frames will be shuffled independent of each other' ) parser.add_argument( '--data-dir', default='data/fmllr', help= 'Data directory, this will be prepended to data files and feature transform' ) parser.add_argument( '--offset-dir', default='data', help='Data directory, this will be prepended to offset files') parser.add_argument( '--target-dir', default='data/targets', help='Data directory, this will be prepended to target files') parser.add_argument( '--ivector-dir', help='Data directory, this will be prepended to ivector files') parser.add_argument('--data', default='data_{}.npy', help='Training data') parser.add_argument('--offsets', default='offsets_{}.npy', help='Training offsets') parser.add_argument('--targets', default='targets_{}.npy', help='Training targets') parser.add_argument('--ivectors', default='ivectors_{}.npy', help='Training ivectors') parser.add_argument('--no-validation', dest='use_validation', action='store_false', help='Do not evaluate validation data while training') parser.add_argument('--train-fold', type=int, help='Train fold network with this ID') parser.add_argument('--train-rpl', action='store_true', help='Train RPL layer') parser.add_argument('--rpl-model', default="result_rpl/model", help='RPL layer model') parser.add_argument('--fold-data-dir', default="fold_data", help='Directory with fold input data') parser.add_argument('--fold-output-dir', default="fold_data_out", help='Directory with predicted fold output') parser.add_argument('--fold-model-dir', default="fold_models", help='Directory with output fold model') parser.add_argument( '--fold-data-pattern', default='data_{0}.npy', help= 'Filename pattern of each fold data, {0} will be replaced by fold ID') parser.add_argument('--fold-offset-pattern', default='offsets_{0}.npy', help='Filename pattern of each fold offset') parser.add_argument('--fold-target-pattern', default='targets_{0}.npy', help='Filename pattern of each fold targets') parser.add_argument( '--fold-ivector-pattern', default='ivectors_{}.npy', help= 'Filename pattern of each fold i-vectors file, {} will be replaced by fold ID' ) parser.add_argument('--fold-output-pattern', default='data_{0}.npy', help='Filename pattern of each fold network output') parser.add_argument('--fold-network-pattern', default='fold_{0}.npz', help='Filename pattern of each fold network') parser.add_argument('--no-progress', action='store_true', help='Disable progress bar') if arg_list is not None: args = parser.parse_args(list(map(str, arg_list))) else: args = parser.parse_args() # set options implied by other options if is_nn_recurrent(args.network): args.shuffle_sequences = True # create output directories Path(args.out).mkdir(exist_ok=True, parents=True) if args.train_fold is not None: file_out = Path(args.fold_model_dir, args.fold_network_pattern.format(args.train_fold)) Path(file_out.parent).mkdir(exist_ok=True, parents=True) # print arguments to the file with open(args.out + "/args.txt", "w") as f: for attr in dir(args): if not attr.startswith('_'): f.write('# {}: {}\n'.format(attr, getattr(args, attr))) f.write(' '.join( map(lambda x: "'" + x + "'" if ' ' in x else x, sys.argv)) + '\n') # print arguments to stdout for attr in dir(args): if not attr.startswith('_'): print('# {}: {}'.format(attr, getattr(args, attr))) print('') # input feature vector length num_classes = 1909 if args.tri else 39 # create model if args.train_rpl: model = RPL4(num_classes) model_cls = L.Classifier(model) else: if args.activation == "sigmoid": activation = F.sigmoid elif args.activation == "tanh": activation = F.tanh elif args.activation == "relu": activation = F.relu else: print("Wrong activation function specified") return model = get_nn(args.network, args.layers, args.units, num_classes, activation, args.tdnn_ksize, args.dropout) # classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model_cls = L.Classifier(model) if args.gpu >= 0: # make a specified GPU current chainer.cuda.get_device_from_id(args.gpu).use() model_cls.to_gpu() # copy the model to the GPU offsets = offsets_dev = None if args.train_rpl: # load training data fold = 0 x = [] y = [] while True: x_file = Path(args.fold_output_dir, args.fold_output_pattern.format(fold)) y_file = Path(args.fold_data_dir, args.fold_target_pattern.format(fold)) if not x_file.is_file() or not y_file.is_file(): break print("Loading fold {} data".format(fold)) x_ = np.load(str(x_file)) y_ = np.load(str(y_file)) x.append(x_) y.append(y_) fold += 1 if fold == 0: print("Error: No fold data found") return x = np.concatenate(x, axis=0) y = np.concatenate(y, axis=0) if args.use_validation: #TODO: use args.data instead of args.dev_data x_dev = np.load(str(Path(args.data_dir, args.data.format("dev")))) # offsets_dev = loadBin(str(Path(args.datadir, args.dev_offsets)), np.int32) y_dev = np.load( str(Path(args.target_dir, args.targets.format("dev")))) else: # load training data ivectors = None ivectors_dev = None if args.train_fold is not None: x = [] offsets = [0] y = [] if args.ivector_dir is not None: ivectors = [] num = 0 fold = 0 while True: if fold != args.train_fold: x_file = Path(args.fold_data_dir, args.fold_data_pattern.format(fold)) if not x_file.is_file(): break offsets_file = Path(args.fold_data_dir, args.fold_offset_pattern.format(fold)) y_file = Path(args.fold_data_dir, args.fold_target_pattern.format(fold)) if args.ivector_dir is not None: ivectors_file = Path( args.fold_data_dir, args.fold_ivector_pattern.format(fold)) if not ivectors_file.is_file(): print("Error: missing ivectors for fold data {}". format(fold)) return print("Loading fold {} data".format(fold)) x_fold = np.load(str(x_file)) x.append(x_fold) if is_nn_recurrent(args.network): offsets_fold = np.load(str(offsets_file)) offsets.extend(offsets_fold[1:] + num) y_fold = np.load(str(y_file)) y.append(y_fold) if args.ivector_dir is not None: ivectors_fold = np.load(str(ivectors_file)) ivectors.append(ivectors_fold) num += x_fold.shape[0] fold += 1 if len(x) == 0: print("Error: No fold data found") return x = np.concatenate(x, axis=0) if is_nn_recurrent(args.network): offsets = np.array(offsets, dtype=np.int32) y = np.concatenate(y, axis=0) if args.ivector_dir is not None: ivectors = np.concatenate(ivectors, axis=0) else: x = np.load(str(Path(args.data_dir, args.data.format("train")))) if is_nn_recurrent(args.network): offsets = np.load( str(Path(args.offset_dir, args.offsets.format("train")))) y = np.load( str(Path(args.target_dir, args.targets.format("train")))) if args.ivector_dir is not None: ivectors = np.load( str(Path(args.ivector_dir, args.ivectors.format("train")))) if args.use_validation: x_dev = np.load(str(Path(args.data_dir, args.data.format("dev")))) if is_nn_recurrent(args.network): offsets_dev = np.load( str(Path(args.offset_dir, args.offsets.format("dev")))) y_dev = np.load( str(Path(args.target_dir, args.targets.format("dev")))) if args.ivector_dir is not None: ivectors_dev = np.load( str(Path(args.ivector_dir, args.ivectors.format("dev")))) # apply splicing if args.network == "tdnn": splice = (sum(args.tdnn_ksize) - len(args.tdnn_ksize)) // 2 else: splice = args.splice if splice > 0: x = splicing(x, range(-splice, splice + 1)) x_dev = splicing(x_dev, range(-splice, splice + 1)) # load feature transform if args.ft is not None and args.ft != '-': ft = loadKaldiFeatureTransform(str(Path(args.data_dir, args.ft))) if is_nn_recurrent( args.network ): # select transform middle frame if the network is recurrent dim = ft["shape"][1] zi = ft["shifts"].index(0) ft["rescale"] = ft["rescale"][zi * dim:(zi + 1) * dim] ft["addShift"] = ft["addShift"][zi * dim:(zi + 1) * dim] ft["shape"][0] = dim ft["shifts"] = [0] elif args.network == "tdnn": dim = ft["shape"][1] zi = ft["shifts"].index(0) winlen = 2 * splice + 1 ft["rescale"] = np.tile(ft["rescale"][zi * dim:(zi + 1) * dim], winlen) ft["addShift"] = np.tile( ft["addShift"][zi * dim:(zi + 1) * dim], winlen) ft["shape"][0] = dim * winlen ft["shifts"] = list(range(-splice, splice + 1)) # apply feature transform x = applyKaldiFeatureTransform(x, ft) if args.use_validation: x_dev = applyKaldiFeatureTransform(x_dev, ft) if ivectors is not None: x = np.concatenate((x, ivectors), axis=1) if ivectors_dev is not None: x_dev = np.concatenate((x_dev, ivectors_dev), axis=1) # shift the input dataset according to time delay if is_nn_recurrent(args.network) and args.timedelay != 0: x, y, offsets = apply_time_delay(x, y, offsets, args.timedelay) if args.use_validation: x_dev, y_dev, offsets_dev = apply_time_delay( x_dev, y_dev, offsets_dev, args.timedelay) # create chainer datasets train_dataset = chainer.datasets.TupleDataset(x, y) if args.use_validation: dev_dataset = chainer.datasets.TupleDataset(x_dev, y_dev) # prepare train stages train_stages_len = max([ len(a) for a in [ args.epoch, args.optimizer, args.batch_size, args.lr, args.early_stopping ] ]) train_stages = [{ 'epoch': index_padded(args.epoch, i), 'opt': index_padded(args.optimizer, i), 'bs': index_padded(args.batch_size, i), 'lr': index_padded(args.lr, i), 'es': index_padded(args.early_stopping, i) } for i in range(train_stages_len)] for i, ts in enumerate(train_stages): if ts['opt'] == 'adam': # learning rate not used, don't print it print( "=== Training stage {}: epoch = {}, batch size = {}, optimizer = {}, early stopping = {}" .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['es'])) else: print( "=== Training stage {}: epoch = {}, batch size = {}, optimizer = {}, learning rate = {}, early stopping = {}" .format(i, ts['epoch'], ts['bs'], ts['opt'], ts['lr'], ts['es'])) # reset state to allow training with different batch size in each stage if not args.train_rpl and is_nn_recurrent(args.network): model.reset_state() # setup an optimizer if ts['opt'] == "sgd": optimizer = chainer.optimizers.SGD(lr=ts['lr']) elif ts['opt'] == "momentumsgd": optimizer = chainer.optimizers.MomentumSGD(lr=ts['lr']) elif ts['opt'] == "adam": optimizer = chainer.optimizers.Adam() else: print("Wrong optimizer specified: {}".format(ts['opt'])) exit(1) optimizer.setup(model_cls) if args.shuffle_sequences: train_iter = SequenceShuffleIterator(train_dataset, offsets, ts['bs']) if args.use_validation: dev_iter = SequenceShuffleIterator(dev_dataset, None, ts['bs'], repeat=False, shuffle=False) else: train_iter = SerialIterator(train_dataset, ts['bs']) if args.use_validation: dev_iter = SerialIterator(dev_dataset, ts['bs'], repeat=False, shuffle=False) # set up a trainer if is_nn_recurrent(args.network): updater = BPTTUpdater(train_iter, optimizer, args.bproplen, device=args.gpu) else: updater = StandardUpdater(train_iter, optimizer, device=args.gpu) if ts['es'] and args.use_validation: stop_trigger = EarlyStoppingTrigger(ts['epoch'], key='validation/main/loss', eps=-0.001) else: stop_trigger = (ts['epoch'], 'epoch') trainer = training.Trainer(updater, stop_trigger, out="{}/{}".format(args.out, i)) if ts['es']: trainer.extend(model_saver) else: trainer.extend(BestModelSaver(key="validation/main/loss")) # evaluate the model with the development dataset for each epoch if args.use_validation: trainer.extend( extensions.Evaluator(dev_iter, model_cls, device=args.gpu)) # dump a computational graph from 'loss' variable at the first iteration # the "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # take a snapshot for each specified epoch frequency = ts['epoch'] if args.frequency == -1 else max( 1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # save two plot images to the result dir if args.plot and extensions.PlotReport.available(): plot_vars_loss = ['main/loss'] plot_vars_acc = ['main/accuracy'] if args.use_validation: plot_vars_loss.append('validation/main/loss') plot_vars_acc.append('validation/main/accuracy') trainer.extend( extensions.PlotReport(plot_vars_loss, 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport(plot_vars_acc, 'epoch', file_name='accuracy.png')) # print selected entries of the log to stdout # here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. if args.use_validation: print_report_vars = [ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ] else: print_report_vars = [ 'epoch', 'main/loss', 'main/accuracy', 'elapsed_time' ] trainer.extend(extensions.PrintReport(print_report_vars)) # print a progress bar to stdout # trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() if ts['es']: # load the last model if the max epoch was not reached (that means early stopping trigger stopped training # because the validation loss increased) if updater.epoch_detail < ts['epoch']: chainer.serializers.load_npz( "{}/{}/model_tmp".format(args.out, i), model_cls) # remove temporary model from this training stage os.remove("{}/{}/model_tmp".format(args.out, i)) else: # load the best model from this training stage chainer.serializers.load_npz( "{}/{}/model_best".format(args.out, i), model_cls) # remove the best model from this training stage os.remove("{}/{}/model_best".format(args.out, i)) # save the final model chainer.serializers.save_npz("{}/model".format(args.out), model_cls) if args.train_fold is not None: chainer.serializers.save_npz( str( Path(args.fold_model_dir, args.fold_network_pattern.format(args.train_fold))), model_cls)
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dimz', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--batchsize', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dimz)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Prepare VAE model, defined in net.py model = net.VAE(784, args.dimz, 500) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Initialize if args.initmodel: chainer.serializers.load_npz(args.initmodel, model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(withlabel=False) if args.test: train, _ = chainer.datasets.split_dataset(train, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=model.get_loss_func()) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu, eval_func=model.get_loss_func(k=10))) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/rec_loss', 'validation/main/rec_loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(28, 28)) fig.savefig(filename) model.to_cpu() train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17] x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images(x.array, os.path.join(args.out, 'train')) save_images(x1.array, os.path.join(args.out, 'train_reconstructed')) test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61] x = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = model(x) save_images(x.array, os.path.join(args.out, 'test')) save_images(x1.array, os.path.join(args.out, 'test_reconstructed')) # draw images from randomly sampled z z = chainer.Variable( np.random.normal(0, 1, (9, args.dimz)).astype(np.float32)) x = model.decode(z) save_images(x.array, os.path.join(args.out, 'sampled'))
def main(): parser = argparse.ArgumentParser(description="Learning from flowers data") parser.add_argument("--gpu", "-g", type=int, default=-1, help="GPU ID (negative value indicates CPU") parser.add_argument("--init", help="Initialize the model from given file") parser.add_argument('--job', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument("--resume", '-r', default='', help="Initialize the trainer from given file") args = parser.parse_args() batch = 32 epoch = 50 val_batch = 200 model = models.ResNet50V1(data.ClassNumber) if args.init: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.init, model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() if data.fetch_flowers() and data.fetch_labels(): print("Flower images and labels have been fetched.") else: print("Failed to fetch flower images and labels") return data.pre_process_data(224) output_name = output.init_train(model.__class__) output_path = path.join(output.OutPath, output_name) train, validate = data.get_datasets() train_iter = chainer.iterators.MultiprocessIterator(train, batch, n_processes=args.job) val_iter = chainer.iterators.MultiprocessIterator(validate, val_batch, repeat=False, n_processes=args.job) classifier = chainer.links.Classifier(model) optimizer = chainer.optimizers.Adam() optimizer.setup(classifier) model.base.disable_update() updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (epoch, 'epoch'), output_path) val_interval = 500, 'iteration' log_interval = 250, 'iteration' snapshot_interval = 5000, 'iteration' trainer.extend(extensions.Evaluator(val_iter, classifier, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) print("Start training") trainer.run() model.to_cpu() chainer.serializers.save_npz(path.join(output_path, "model.npz"), model) print("Uploading files") output.upload_result(output_name) print("Finish training")
loss_config=config.loss, predictor=predictor, discriminator=discriminator, device=config.train.gpu, iterator=train_iter, optimizer=opts, converter=converter, ) # trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trainer = training.Trainer(updater, out=arguments.output) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) trainer.extend(extensions.PrintReport(['predictor/loss'])) save_args(arguments, arguments.output) trainer.run()
def train_model(self, datasets): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=10, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--early-stopping', type=str, help='Metric to watch for early stopping') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id(args.gpu).use() self.model.to_gpu() optimizer = chainer.optimizers.Adam(args.learnrate) optimizer.setup(self.model) optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(5e-4)) train, test = split_dataset(datasets, 80) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) stop_trigger = (args.epoch, 'epoch') # Early stopping option if args.early_stopping: stop_trigger = triggers.EarlyStoppingTrigger( monitor=args.early_stopping, verbose=True, max_trigger=(args.epoch, 'epoch')) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=mean_squared_error) trainer = training.Trainer(updater, stop_trigger, out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend( extensions.Evaluator(test_iter, self.model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend( extensions.snapshot(filename='snaphot_epoch_{.updater.epoch}')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) print(train[:1]) # Run the training trainer.run() return self.model
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--noplot', dest='plot', action='store_false', help='Disable PlotReport extension') group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) model.to_device(device) device.use() # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. # TODO(niboshi): Temporarily disabled for chainerx. Fix it. if device.xp is not chainerx: trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch frequency = args.epoch if args.frequency == -1 else max(1, args.frequency) trainer.extend(extensions.snapshot(), trigger=(frequency, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if args.plot and extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnet50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=10, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=10, help='Number of units') args = parser.parse_args() # load a color image img = cv2.imread('images/blue.jpg', cv2.IMREAD_COLOR) # print img # print img.shape blue = [] green = [] red = [] for y in range(len(img)): for x in range(len(img[y])): blue.append(img[y][x][0]) green.append(img[y][x][1]) red.append(img[y][x][2]) bgr = blue + green + red imgdata = np.array(bgr, dtype='f') imgdata = imgdata.reshape(1, 3, 8, 16) imgdata = imgdata / 255.0 print imgdata n_in = 3 * 8 * 16 print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') model = L.Classifier(MLP(n_in, args.unit, 10)) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load dataset x = imgdata y = np.array(5, dtype=np.int32) dd = [(x, y)] train, test = dd, dd train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot()) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Resume from a snapshot # chainer.serializers.load_npz(resume, trainer) # Run the training trainer.run()
def main(): # This script is almost identical to train_mnist.py. The only difference is # that this script uses data-parallel computation on two GPUs. # See train_mnist.py for more details. parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=400, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu0', '-g', type=int, default=0, help='First GPU ID') parser.add_argument('--gpu1', '-G', type=int, default=1, help='Second GPU ID') parser.add_argument('--out', '-o', default='result_parallel', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--train_imgs', default='data/kmnist-train-imgs.npz', help='Path to kmnist training images') parser.add_argument('--train_label', default='data/kmnist-train-labels.npz', help='Path to kmnist training labels') parser.add_argument('--test_imgs', default='data/kmnist-test-imgs.npz', help='Path to kmnist test images') parser.add_argument('--test_label', default='data/kmnist-test-labels.npz', help='Path to kmnist test labels') args = parser.parse_args() print('GPU: {}, {}'.format(args.gpu0, args.gpu1)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') chainer.backends.cuda.get_device_from_id(args.gpu0).use() model = L.Classifier(train_kmnist.MLP(args.unit, 10)) optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load and prepare the KMNIST dataset train_data = np.load(args.train_imgs)['arr_0'].\ reshape((60000, 784)).astype(np.float32)/255. train_labels = [int(n) for n in np.load(args.train_label)['arr_0']] train = TupleDataset(train_data, train_labels) test_data = np.load(args.test_imgs)['arr_0'].\ reshape((10000, 784)).astype(np.float32)/255. test_labels = [int(n) for n in np.load(args.test_label)['arr_0']] test = TupleDataset(test_data, test_labels) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # ParallelUpdater implements the data-parallel gradient computation on # multiple GPUs. It accepts "devices" argument that specifies which GPU to # use. updater = training.updaters.ParallelUpdater( train_iter, optimizer, # The device of the name 'main' is used as a "master", while others are # used as slaves. Names other than 'main' are arbitrary. devices={ 'main': args.gpu0, 'second': args.gpu1 }, ) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu0)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=1, help='Number of images in each mini batch') parser.add_argument('--epoch', '-e', type=int, default=100, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=10, help='Number of units') args = parser.parse_args() # load a color image img1 = cv2.imread('images/zero.jpg', cv2.IMREAD_COLOR) img2 = cv2.imread('images/black.jpg', cv2.IMREAD_COLOR) img3 = cv2.imread('images/white.jpg', cv2.IMREAD_COLOR) # color -> grayscale imggray1 = cv2.cvtColor(img1, cv2.COLOR_RGB2GRAY) imggray2 = cv2.cvtColor(img2, cv2.COLOR_RGB2GRAY) imggray3 = cv2.cvtColor(img3, cv2.COLOR_RGB2GRAY) # image -> array gray = [] for y in range(len(imggray1)): for x in range(len(imggray1[y])): gray.append(imggray1[y][x]) imgdata1 = np.array(gray, dtype='f') imgdata1 = imgdata1.reshape(1, 1, 32, 32) imgdata1 = imgdata1 / 255.0 gray = [] for y in range(len(imggray2)): for x in range(len(imggray2[y])): gray.append(imggray2[y][x]) imgdata2 = np.array(gray, dtype='f') imgdata2 = imgdata2.reshape(1, 1, 32, 32) imgdata2 = imgdata2 / 255.0 gray = [] for y in range(len(imggray3)): for x in range(len(imggray3[y])): gray.append(imggray3[y][x]) imgdata3 = np.array(gray, dtype='f') imgdata3 = imgdata3.reshape(1, 1, 32, 32) imgdata3 = imgdata3 / 255.0 n_in = 32*32 print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') model = L.Classifier(MLP(n_in, args.unit, 3)) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load dataset x1 = imgdata1 x2 = imgdata2 x3 = imgdata3 y1 = np.array(0, dtype=np.int32) y2 = np.array(1, dtype=np.int32) y3 = np.array(2, dtype=np.int32) dd = [(x1, y1), (x2, y2), (x3, y3)] train, test = dd, dd train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot()) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) # Resume from a snapshot #chainer.serializers.load_npz(resume, trainer) # Run the training trainer.run()
def train(): parser = argparse.ArgumentParser(description='DAGMM') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--batchsize', '-b', type=int, default=128, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=10000, help='Number of sweeps over the dataset to train') parser.add_argument('--cn_h_unit', type=int, default=10, help='Number of Compression Network hidden units') parser.add_argument('--cn_z_unit', type=int, default=2, help='Number of Compression Network z units') parser.add_argument('--en_h_unit', type=int, default=10, help='Number of Estimation Network hidden units') parser.add_argument('--en_o_unit', type=int, default=2, help='Number of Estimation Network output units') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--frequency', '-f', type=int, default=20, help='Frequency of taking a snapshot') parser.add_argument( '--resume', '-r', type=int, help='Resume the training from snapshot that is designated epoch number' ) args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('# Compression Network: Dim - {0} - {1} - {0} - Dim'.format( args.cn_h_unit, args.cn_z_unit)) print('# Estimation Network: {} - {} - {}'.format(args.cn_z_unit + 2, args.en_h_unit, args.en_o_unit)) print('# Output-directory: {}'.format(args.out)) print('# Frequency-snapshot: {}'.format(args.frequency)) if args.resume: print('# Resume-epochNumber: {}'.format(args.resume)) print('') # データセット読み込み x_data = np.loadtxt('./dataset_arrhythmia/ExplanatoryVariables.csv', delimiter=',') y_label = np.loadtxt('./dataset_arrhythmia/CriterionVariables.csv', delimiter=',') # 正常データのみを抽出 HealthData = x_data[y_label[:] == 1] # 正常データを学習用と検証用に分割 NumOfHealthData = len(HealthData) trainData = HealthData[:math.floor(NumOfHealthData * 0.9)] validData = HealthData[len(trainData):] # 型変換 trainData = trainData.astype(np.float32) validData = validData.astype(np.float32) train_iter = chainer.iterators.SerialIterator(trainData, batch_size=args.batchsize, repeat=True, shuffle=True) valid_iter = chainer.iterators.SerialIterator(validData, batch_size=len(validData), repeat=False, shuffle=False) model = DAGMM(args.cn_h_unit, args.cn_z_unit, len(trainData[0]), args.en_h_unit, args.en_o_unit) if args.gpu >= 0: # Make a specified GPU current chainer.backends.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Copy the model to the GPU optimizer = optimizers.Adam(alpha=0.0001) optimizer.setup(model) if args.resume: serializers.load_npz( args.out + '/model_snapshot_epoch_' + str(args.resume), model) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, loss_func=model.lossFunc(gpu=args.gpu)) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend( extensions.Evaluator(valid_iter, model, device=args.gpu, eval_func=model.lossFunc(gpu=args.gpu))) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend( extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'), trigger=(args.frequency, 'epoch')) trainer.extend(extensions.snapshot_object( model, filename='model_snapshot_epoch_{.updater.epoch}'), trigger=(args.frequency, 'epoch')) trainer.extend(extensions.snapshot_object( optimizer, filename='optimizer_snapshot_epoch_{.updater.epoch}'), trigger=(args.frequency, 'epoch')) trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], x_key='epoch', file_name='loss1.png')) trainer.extend( extensions.PlotReport(['main/loss'], x_key='epoch', file_name='loss2.png')) trainer.extend(extensions.LogReport(log_name="log", trigger=(1, 'epoch'))) trainer.extend( extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: serializers.load_npz(args.out + '/snapshot_epoch-' + str(args.resume), trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: MNIST') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# unit: {}'.format(args.unit)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. model = L.Classifier(MLP(args.unit, 10)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(model) # Load the MNIST dataset train, test = chainer.datasets.get_mnist() train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu)) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example:cfiar-VGG') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--frequency', '-f', type=int, default=-1, help='Frequency of taking a snapshot') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') train = get_data(r"/home/notsuji/chainer_src/origin/mnist_data/Data/train") test = get_data(r"/home/notsuji/chainer_src/origin/mnist_data/Data/test") # Setup model model = L.Classifier(VGG(10)) if args.gpu >= 0: chainer.cuda.get_device_from_id(args.gpu).use() model.to_gpu() # Setup an optimizer optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu), trigger=(1, 'epoch')) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot for each specified epoch trainer.extend( extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(10, 'epoch')) trainer.extend(extensions.snapshot_object(model, 'model_epoch_{.updater.epoch}'), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Save two plot images to the result dir if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
else: train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size) test_iter = chainer.iterators.MultiprocessIterator(test, args.batch_size, repeat=False) updater = training.updater.StandardUpdater(train_iter, optimizer, device=device) stop_trigger = (args.epochs, 'epoch') trainer = training.Trainer(updater, stop_trigger, out=args.output_data_dir) # Evaluate the model with the test dataset for each epoch trainer.extend(extensions.Evaluator(test_iter, model, device=device)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) if extensions.PlotReport.available(): trainer.extend( extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png')) trainer.extend( extensions.PlotReport( ['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='accuracy.png')) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and
# trainer trigger_log = (config.train.log_iteration, 'iteration') trigger_snapshot = (config.train.snapshot_iteration, 'iteration') trainer = training.Trainer(updater, out=arguments.output) ext = extensions.Evaluator(test_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='test', trigger=trigger_log) ext = extensions.Evaluator(train_eval_iter, models, converter, device=config.train.gpu, eval_func=updater.forward) trainer.extend(ext, name='train', trigger=trigger_log) trainer.extend(extensions.dump_graph('predictor/loss')) ext = extensions.snapshot_object(predictor, filename='predictor_{.updater.iteration}.npz') trainer.extend(ext, trigger=trigger_snapshot) trainer.extend(extensions.LogReport(trigger=trigger_log)) print(args) save_args(args, args['output']) trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer example: VAE') parser.add_argument('--initmodel', '-m', default='', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Resume the optimization from snapshot') parser.add_argument('--gpu', '-g', default=-1, type=int, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='results', help='Directory to output the result') parser.add_argument('--epoch', '-e', default=100, type=int, help='number of epochs to learn') parser.add_argument('--dim-z', '-z', default=20, type=int, help='dimention of encoded vector') parser.add_argument('--dim-h', default=500, type=int, help='dimention of hidden layer') parser.add_argument('--beta', default=1.0, type=float, help='Regularization coefficient for ' 'the second term of ELBO bound') parser.add_argument('--k', '-k', default=1, type=int, help='Number of Monte Carlo samples used in ' 'encoded vector') parser.add_argument('--binary', action='store_true', help='Use binarized MNIST') parser.add_argument('--batch-size', '-b', type=int, default=100, help='learning minibatch size') parser.add_argument('--test', action='store_true', help='Use tiny datasets for quick tests') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# dim z: {}'.format(args.dim_z)) print('# Minibatch-size: {}'.format(args.batch_size)) print('# epoch: {}'.format(args.epoch)) print('') # Prepare VAE model, defined in net.py encoder = net.make_encoder(784, args.dim_z, args.dim_h) decoder = net.make_decoder(784, args.dim_z, args.dim_h, binary_check=args.binary) prior = net.make_prior(args.dim_z) avg_elbo_loss = net.AvgELBOLoss(encoder, decoder, prior, beta=args.beta, k=args.k) if args.gpu >= 0: avg_elbo_loss.to_gpu(args.gpu) # Setup an optimizer optimizer = chainer.optimizers.Adam() optimizer.setup(avg_elbo_loss) # Initialize if args.initmodel: chainer.serializers.load_npz(args.initmodel, avg_elbo_loss) # Load the MNIST dataset train, test = chainer.datasets.get_mnist(withlabel=False) if args.binary: # Binarize dataset train = (train >= 0.5).astype(np.float32) test = (test >= 0.5).astype(np.float32) if args.test: train, _ = chainer.datasets.split_dataset(train, 100) test, _ = chainer.datasets.split_dataset(test, 100) train_iter = chainer.iterators.SerialIterator(train, args.batch_size) test_iter = chainer.iterators.SerialIterator(test, args.batch_size, repeat=False, shuffle=False) # Set up an updater. StandardUpdater can explicitly specify a loss function # used in the training with 'loss_func' option updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu, loss_func=avg_elbo_loss) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(extensions.Evaluator( test_iter, avg_elbo_loss, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) trainer.extend(extensions.LogReport()) trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/reconstr', 'main/kl_penalty', 'elapsed_time'])) trainer.extend(extensions.ProgressBar()) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run() # Visualize the results def save_images(x, filename): import matplotlib.pyplot as plt fig, ax = plt.subplots(3, 3, figsize=(9, 9), dpi=100) for ai, xi in zip(ax.flatten(), x): ai.imshow(xi.reshape(28, 28)) fig.savefig(filename) avg_elbo_loss.to_cpu() train_ind = [1, 3, 5, 10, 2, 0, 13, 15, 17] x = chainer.Variable(np.asarray(train[train_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = decoder(encoder(x).mean, inference=True).mean save_images(x.array, os.path.join(args.out, 'train')) save_images(x1.array, os.path.join(args.out, 'train_reconstructed')) test_ind = [3, 2, 1, 18, 4, 8, 11, 17, 61] x = chainer.Variable(np.asarray(test[test_ind])) with chainer.using_config('train', False), chainer.no_backprop_mode(): x1 = decoder(encoder(x).mean, inference=True).mean save_images(x.array, os.path.join(args.out, 'test')) save_images(x1.array, os.path.join(args.out, 'test_reconstructed')) # draw images from randomly sampled z z = prior().sample(9) x = decoder(z, inference=True).mean save_images(x.array, os.path.join(args.out, 'sampled'))
# ニューラルネットワークの作成 ## 2クラス分類問題のため、損失関数にソフトマックス交差エントロピーを使用 model = L.Classifier(Model(), lossfun=F.softmax_cross_entropy) # 学習開始 run_training( model, train, epochs, chainer.optimizers.Adam(), # 最適化関数=Adam batchsize=batchsize, validation=test, gpu_device=0, # GPU使用 extensions=[ extensions.LogReport(), # ログ表示 extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ]), # 計算状態の表示 extensions.dump_graph('main/loss'), # ニューラルネットワークの構造 extensions.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='01.loss.png'), # 誤差のグラフ extensions.PlotReport(['main/accuracy', 'validation/main/accuracy'], 'epoch', file_name='01.accuracy.png'), # 精度のグラフ ]) # ニューラルネットワーク構造(dump_graph('main/loss'))の可視化 ## > conda install -c anaconda graphviz ## > dot -Tpng result/cg.dot -o result/cg.png
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnet50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) args = parser.parse_args() # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.backends.cuda.get_device_from_id( args.gpu).use() # Make the GPU current model.to_gpu() # Load the datasets and mean file mean = np.load(args.mean) train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel to # the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu), trigger=val_interval) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): # define options parser = argparse.ArgumentParser( description='Training script of DenseNet on CIFAR-10 dataset') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of epochs to train') parser.add_argument('--gpu', '-g', type=int, default=-1, help='GPU ID (negative value indicates CPU)') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Validation minibatch size') parser.add_argument('--numlayers', '-L', type=int, default=40, help='Number of layers') parser.add_argument('--growth', '-G', type=int, default=12, help='Growth rate parameter') parser.add_argument('--dropout', '-D', type=float, default=0.2, help='Dropout ratio') parser.add_argument('--dataset', type=str, default='C10', choices=('C10', 'C10+', 'C100', 'C100+'), help='Dataset used for training (Default is C10)') args = parser.parse_args() # load dataset if args.dataset == 'C10': train, test = dataset.get_C10() elif args.dataset == 'C10+': train, test = dataset.get_C10_plus() elif args.dataset == 'C100': train, test = dataset.get_C100() elif args.dataset == 'C100+': train, test = dataset.get_C100_plus() train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize) test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize, repeat=False, shuffle=False) # setup model model = L.Classifier( DenseNet(args.numlayers, args.growth, 16, args.dropout, 10)) if args.initmodel: print('Load model from', args.initmodel) chainer.serializers.load_npz(args.initmodel, model) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() model.to_gpu() # setup optimizer optimizer = chainer.optimizers.NesterovAG(lr=0.1, momentum=0.9) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(1e-4)) # setup trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) trainer.extend(Evaluator(test_iter, model, device=args.gpu)) trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=(10, 'epoch')) trainer.extend( extensions.snapshot_object(model, 'model_{.updater.epoch}.npz')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy' ])) trainer.extend(extensions.ProgressBar()) # devide lr by 10 at 0.5, 0.75 fraction of total number of training epochs iter_per_epoch = math.ceil(len(train) / args.batchsize) n_iter1 = int(args.epoch * 0.5 * iter_per_epoch) n_iter2 = int(args.epoch * 0.75 * iter_per_epoch) shifts = [(n_iter1, 0.01), (n_iter2, 0.001)] trainer.extend(StepShift('lr', shifts, optimizer)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) # start training trainer.run()
def main(): parser = argparse.ArgumentParser(description='Chainer CIFAR example:') parser.add_argument('--dataset', '-d', default='cifar10', help='The dataset to use: cifar10 or cifar100') parser.add_argument('--batchsize', '-b', type=int, default=64, help='Number of images in each mini-batch') parser.add_argument('--learnrate', '-l', type=float, default=0.05, help='Learning rate for SGD') parser.add_argument('--epoch', '-e', type=int, default=300, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', type=int, default=0, help='GPU ID (negative value indicates CPU)') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--resume', '-r', default='', help='Resume the training from snapshot') args = parser.parse_args() print('GPU: {}'.format(args.gpu)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Set up a neural network to train. # Classifier reports softmax cross entropy loss and accuracy at every # iteration, which will be used by the PrintReport extension below. if args.dataset == 'cifar10': print('Using CIFAR10 dataset.') class_labels = 10 train, test = get_cifar10() elif args.dataset == 'cifar100': print('Using CIFAR100 dataset.') class_labels = 100 train, test = get_cifar100() else: raise RuntimeError('Invalid dataset choice.') model = L.Classifier(models.VGG.VGG(class_labels)) if args.gpu >= 0: chainer.cuda.get_device(args.gpu).use() # Make a specified GPU current model.to_gpu() # Copy the model to the GPU optimizer = chainer.optimizers.MomentumSGD(args.learnrate) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4)) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) # Set up a trainer updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Evaluate the model with the test dataset for each epoch trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu)) # Reduce the learning rate by half every 25 epochs. trainer.extend(extensions.ExponentialShift('lr', 0.5), trigger=(25, 'epoch')) # Dump a computational graph from 'loss' variable at the first iteration # The "main" refers to the target link of the "main" optimizer. trainer.extend(extensions.dump_graph('main/loss')) # Take a snapshot at each epoch trainer.extend(extensions.snapshot(), trigger=(args.epoch, 'epoch')) # Write a log of evaluation statistics for each epoch trainer.extend(extensions.LogReport()) # Print selected entries of the log to stdout # Here "main" refers to the target link of the "main" optimizer again, and # "validation" refers to the default name of the Evaluator extension. # Entries other than 'epoch' are reported by the Classifier link, called by # either the updater or the evaluator. trainer.extend(extensions.PrintReport( ['epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time'])) # Print a progress bar to stdout trainer.extend(extensions.ProgressBar()) if args.resume: # Resume from a snapshot chainer.serializers.load_npz(args.resume, trainer) # Run the training trainer.run()
multibox_encoder) train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) optimizer = chainer.optimizers.MomentumSGD(lr=0.001) optimizer.setup(model) optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005)) updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu) trainer = training.Trainer(updater, (120000, 'iteration'), args.out) snapshot_interval = 1000, 'iteration' log_interval = 10, 'iteration' trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=snapshot_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport( ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): archs = { 'alex': alex.Alex, 'alex_fp16': alex.AlexFp16, 'googlenet': googlenet.GoogLeNet, 'googlenetbn': googlenetbn.GoogLeNetBN, 'googlenetbn_fp16': googlenetbn.GoogLeNetBNFp16, 'nin': nin.NIN, 'resnet50': resnet50.ResNet50, 'resnext50': resnext50.ResNeXt50, } parser = argparse.ArgumentParser( description='Learning convnet from ILSVRC2012 dataset') parser.add_argument('train', help='Path to training image-label list file') parser.add_argument('val', help='Path to validation image-label list file') parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin', help='Convnet architecture') parser.add_argument('--batchsize', '-B', type=int, default=32, help='Learning minibatch size') parser.add_argument('--epoch', '-E', type=int, default=10, help='Number of epochs to train') parser.add_argument('--device', '-d', type=str, default='-1', help='Device specifier. Either ChainerX device ' 'specifier or an integer. If non-negative integer, ' 'CuPy arrays with specified device id are used. If ' 'negative integer, NumPy arrays are used') parser.add_argument('--initmodel', help='Initialize the model from given file') parser.add_argument('--loaderjob', '-j', type=int, help='Number of parallel data loading processes') parser.add_argument('--mean', '-m', default='mean.npy', help='Mean file (computed by compute_mean.py)') parser.add_argument('--resume', '-r', default='', help='Initialize the trainer from given file') parser.add_argument('--out', '-o', default='result', help='Output directory') parser.add_argument('--root', '-R', default='.', help='Root directory path of image files') parser.add_argument('--val_batchsize', '-b', type=int, default=250, help='Validation minibatch size') parser.add_argument('--test', action='store_true') parser.set_defaults(test=False) parser.add_argument('--dali', action='store_true') parser.set_defaults(dali=False) group = parser.add_argument_group('deprecated arguments') group.add_argument('--gpu', '-g', type=int, nargs='?', const=0, help='GPU ID (negative value indicates CPU)') args = parser.parse_args() device = parse_device(args) print('Device: {}'.format(device)) print('# Minibatch-size: {}'.format(args.batchsize)) print('# epoch: {}'.format(args.epoch)) print('') # Initialize the model to train model = archs[args.arch]() if args.initmodel: print('Load model from {}'.format(args.initmodel)) chainer.serializers.load_npz(args.initmodel, model) model.to_device(device) device.use() # Load the mean file mean = np.load(args.mean) if args.dali: if not dali_util._dali_available: raise RuntimeError('DALI seems not available on your system.') num_threads = args.loaderjob if num_threads is None or num_threads <= 0: num_threads = 1 ch_mean = list(np.average(mean, axis=(1, 2))) ch_std = [255.0, 255.0, 255.0] # Setup DALI pipelines train_pipe = dali_util.DaliPipelineTrain( args.train, args.root, model.insize, args.batchsize, num_threads, args.gpu, True, mean=ch_mean, std=ch_std) val_pipe = dali_util.DaliPipelineVal( args.val, args.root, model.insize, args.val_batchsize, num_threads, args.gpu, False, mean=ch_mean, std=ch_std) train_iter = chainer.iterators.DaliIterator(train_pipe) val_iter = chainer.iterators.DaliIterator(val_pipe, repeat=False) # converter = dali_converter converter = dali_util.DaliConverter(mean=mean, crop_size=model.insize) else: # Load the dataset files train = PreprocessedDataset(args.train, args.root, mean, model.insize) val = PreprocessedDataset(args.val, args.root, mean, model.insize, False) # These iterators load the images with subprocesses running in parallel # to the training/validation. train_iter = chainer.iterators.MultiprocessIterator( train, args.batchsize, n_processes=args.loaderjob) val_iter = chainer.iterators.MultiprocessIterator( val, args.val_batchsize, repeat=False, n_processes=args.loaderjob) converter = dataset.concat_examples # Set up an optimizer optimizer = chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9) optimizer.setup(model) # Set up a trainer updater = training.updaters.StandardUpdater( train_iter, optimizer, converter=converter, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out) val_interval = (1 if args.test else 100000), 'iteration' log_interval = (1 if args.test else 1000), 'iteration' trainer.extend(extensions.Evaluator(val_iter, model, converter=converter, device=device), trigger=val_interval) # TODO(sonots): Temporarily disabled for chainerx. Fix it. if not (chainerx.is_available() and isinstance(device, chainerx.Device)): trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.snapshot(), trigger=val_interval) trainer.extend(extensions.snapshot_object( model, 'model_iter_{.updater.iteration}'), trigger=val_interval) # Be careful to pass the interval directly to LogReport # (it determines when to emit log rather than when to read observations) trainer.extend(extensions.LogReport(trigger=log_interval)) trainer.extend(extensions.observe_lr(), trigger=log_interval) trainer.extend(extensions.PrintReport([ 'epoch', 'iteration', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'lr' ]), trigger=log_interval) trainer.extend(extensions.ProgressBar(update_interval=10)) if args.resume: chainer.serializers.load_npz(args.resume, trainer) trainer.run()
def main(): parser = argparse.ArgumentParser(description='''\ ChainerMN example: MNIST with automatic checkpoints enabled''') parser.add_argument('--batchsize', '-b', type=int, default=100, help='Number of images in each mini-batch') parser.add_argument('--communicator', type=str, default='hierarchical', help='Type of communicator') parser.add_argument('--epoch', '-e', type=int, default=20, help='Number of sweeps over the dataset to train') parser.add_argument('--gpu', '-g', action='store_true', help='Use GPU') parser.add_argument('--out', '-o', default='result', help='Directory to output the result') parser.add_argument('--unit', '-u', type=int, default=1000, help='Number of units') parser.add_argument('--run-id', type=str, default='train-mnist-example', help='ID of the task name') args = parser.parse_args() # Prepare ChainerMN communicator. if args.gpu: if args.communicator == 'naive': print("Error: 'naive' communicator does not support GPU.\n") exit(-1) comm = chainermn.create_communicator(args.communicator) device = comm.intra_rank else: if args.communicator != 'naive': print('Warning: using naive communicator ' 'because only naive supports CPU-only execution') comm = chainermn.create_communicator('naive') device = -1 if comm.mpi_comm.rank == 0: print('==========================================') print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size())) if args.gpu: print('Using GPUs') print('Using {} communicator'.format(args.communicator)) print('Num unit: {}'.format(args.unit)) print('Num Minibatch-size: {}'.format(args.batchsize)) print('Num epoch: {}'.format(args.epoch)) print('==========================================') model = L.Classifier(MLP(args.unit, 10)) if device >= 0: chainer.cuda.get_device(device).use() model.to_gpu() # Create a multi node optimizer from a standard Chainer optimizer. optimizer = chainermn.create_multi_node_optimizer( chainer.optimizers.Adam(), comm) optimizer.setup(model) # Split and distribute the dataset. Only worker 0 loads the whole dataset. # Datasets of worker 0 are evenly split and distributed to all workers. if comm.rank == 0: train, test = chainer.datasets.get_mnist() else: train, test = None, None train = chainermn.scatter_dataset(train, comm, shuffle=True) test = chainermn.scatter_dataset(test, comm, shuffle=True) train_iter = chainer.iterators.SerialIterator(train, args.batchsize) test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False) updater = training.StandardUpdater(train_iter, optimizer, device=device) trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out) # Enable checkpointer and recover from checkpoint if any checkpoint exists checkpointer = create_multi_node_checkpointer(name=args.run_id, comm=comm) checkpointer.maybe_load(trainer, optimizer) print("Rank", comm.rank, ": (Re)Starting from (epoch, iter) =", (trainer.updater.epoch, trainer.updater.iteration)) trainer.extend(checkpointer, trigger=(1000, 'iteration')) # Create a multi node evaluator from a standard Chainer evaluator. evaluator = extensions.Evaluator(test_iter, model, device=device) evaluator = chainermn.create_multi_node_evaluator(evaluator, comm) trainer.extend(evaluator) # Some display and output extensions are necessary only for one worker. # (Otherwise, there would just be repeated outputs.) if comm.rank == 0: trainer.extend(extensions.dump_graph('main/loss')) trainer.extend(extensions.LogReport()) trainer.extend( extensions.PrintReport([ 'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy', 'validation/main/accuracy', 'elapsed_time' ])) trainer.extend(extensions.ProgressBar()) trainer.run()