Exemplo n.º 1
0
def schedule_optimizer_value(
    epoch_list, value_list, optimizer_name="main", attr_name="lr"
):
    """Set optimizer's hyperparameter according to value_list,
    scheduled on epoch_list.

    Example usage:
    trainer.extend(schedule_optimizer_value([2, 4, 7], [0.008, 0.006, 0.002]))
    """
    if isinstance(epoch_list, list):
        assert len(epoch_list) == len(value_list)
    else:
        assert isinstance(epoch_list, float) or isinstance(epoch_list, int)
        assert isinstance(value_list, float) or isinstance(value_list, int)
        epoch_list = [
            epoch_list,
        ]
        value_list = [
            value_list,
        ]

    trigger = triggers.ManualScheduleTrigger(epoch_list, "epoch")
    count = 0

    @chainer.training.extension.make_extension(trigger=trigger)
    def set_value(trainer: Trainer):
        nonlocal count
        optimizer = trainer.updater.get_optimizer(optimizer_name)
        setattr(optimizer, attr_name, value_list[count])
        count += 1

    return set_value
Exemplo n.º 2
0
def trainer_extend(trainer, net, evaluator, small_lr_layers, lr_decay_rate,
                   lr_decay_epoch, epoch_or_iter, save_trainer_interval):
    def slow_drop_lr(trainer):
        if small_lr_layers:
            for layer_name in small_lr_layers:
                layer = getattr(net.predictor, layer_name)
                layer.W.update_rule.hyperparam.lr *= lr_decay_rate
                layer.b.update_rule.hyperparam.lr *= lr_decay_rate

    # Learning rate
    trainer.extend(slow_drop_lr,
                   trigger=triggers.ManualScheduleTrigger(
                       lr_decay_epoch, epoch_or_iter))
    trainer.extend(extensions.ExponentialShift('lr', lr_decay_rate),
                   trigger=triggers.ManualScheduleTrigger(
                       lr_decay_epoch, epoch_or_iter))

    # Observe training
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.observe_lr(), trigger=(1, epoch_or_iter))
    trainer.extend(evaluator, name='val')

    print_report = [
        "epoch", "main/loss", "val/main/miou", "val/main/pixel_accuracy",
        "val/main/mean_class_accuracy", "lr", "elapsed_time"
    ]

    trainer.extend(extensions.PrintReport(print_report))

    # save results of training
    trainer.extend(
        extensions.PlotReport(['main/loss'],
                              x_key=epoch_or_iter,
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport([
            'val/main/miou', 'val/main/pixel_accuracy',
            'val/main/mean_class_accuracy'
        ],
                              x_key=epoch_or_iter,
                              file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(filename="snapshot_epoch-" +
                                       '{.updater.epoch}'),
                   trigger=(save_trainer_interval, epoch_or_iter))
Exemplo n.º 3
0
def test_chainer_pruning_extension_trigger():
    # type: () -> None

    study = optuna.create_study()
    trial = create_running_trial(study, 1.0)

    extension = ChainerPruningExtension(trial, 'main/loss', (1, 'epoch'))
    assert isinstance(extension.pruner_trigger, triggers.IntervalTrigger)
    extension = ChainerPruningExtension(trial, 'main/loss', triggers.IntervalTrigger(1, 'epoch'))
    assert isinstance(extension.pruner_trigger, triggers.IntervalTrigger)
    extension = ChainerPruningExtension(trial, 'main/loss',
                                        triggers.ManualScheduleTrigger(1, 'epoch'))
    assert isinstance(extension.pruner_trigger, triggers.ManualScheduleTrigger)

    with pytest.raises(TypeError):
        ChainerPruningExtension(trial, 'main/loss', triggers.TimeTrigger(1.))
Exemplo n.º 4
0
def test_chainer_pruning_extension_trigger() -> None:

    study = optuna.create_study()
    trial = study.ask()

    extension = ChainerPruningExtension(trial, "main/loss", (1, "epoch"))
    assert isinstance(extension._pruner_trigger, triggers.IntervalTrigger)
    extension = ChainerPruningExtension(
        trial, "main/loss", triggers.IntervalTrigger(1, "epoch")  # type: ignore
    )
    assert isinstance(extension._pruner_trigger, triggers.IntervalTrigger)
    extension = ChainerPruningExtension(
        trial, "main/loss", triggers.ManualScheduleTrigger(1, "epoch")  # type: ignore
    )
    assert isinstance(extension._pruner_trigger, triggers.ManualScheduleTrigger)

    with pytest.raises(TypeError):
        ChainerPruningExtension(trial, "main/loss", triggers.TimeTrigger(1.0))  # type: ignore
def main():
    parser = argparse.ArgumentParser(description='noisy CIFAR-10 training:')
    parser.add_argument('--batchsize', type=int, default=128,
                        help='Number of images in each mini-batch')
    parser.add_argument('--learnrate', type=float, default=0.2,
                        help='Learning rate for SGD')
    parser.add_argument('--weight', type=float, default=1e-4,
                        help='Weight decay parameter')
    parser.add_argument('--epoch', type=int, default=120,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', type=int, default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out', default='result',
                        help='Directory to output the result')
    parser.add_argument('--mean', default='mean.npy',
                        help='Mean image file')
    parser.add_argument('--label', default='result',
                        help='Directory where the labels obtained in the first step exist')
    parser.add_argument('--seed', type=int, default=0,
                        help='Random Seed')
    args = parser.parse_args()

    np.random.seed(args.seed)

    train_val_d, test_d = get_cifar10()
    train_d, val_d = train_val_split(train_val_d, int(len(train_val_d)*0.9))

    if os.path.exists(args.mean):
        mean = np.load(args.mean)
    else:
        mean = np.mean([x for x, _ in train_d], axis=0)
        np.save(args.mean, mean)

    model = TrainChain()
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    updated_labels = np.load('{}/labels.npy'.format(args.label))
    updated_soft_labels = np.load('{}/soft_labels.npy'.format(args.label))
    train = TrainData(train_d, mean, updated_labels, updated_soft_labels)
    val = ValData(val_d, mean)
    test = ValData(test_d, mean)

    optimizer = chainer.optimizers.MomentumSGD(lr=args.learnrate, momentum=0.9)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(args.weight))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val_iter = chainer.iterators.SerialIterator(val, args.batchsize, repeat=False, shuffle=False)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trigger_epochs = [int(args.epoch / 3), int(args.epoch * 2 / 3)]
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.learnrate), trigger=triggers.ManualScheduleTrigger(trigger_epochs, 'epoch'))

    trainer.extend(extensions.Evaluator(val_iter, model, device=args.gpu))
    trainer.extend(extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'), trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss','main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    trainer.run()

    test_evaluator = extensions.Evaluator(test_iter, model, device=args.gpu)
    results = test_evaluator()
    print('Test accuracy:', results['main/accuracy'])
Exemplo n.º 6
0
def main():
    parser = argparse.ArgumentParser(description='Chainer-PSMNet')
    parser.add_argument('--maxdisp', type=int, default=192,
                        help='maxium disparity')
    parser.add_argument('--datapath', default='/home/<username>/datasets/KITTI_stereo/training/',  # /home/<username>/datasets/KITTI_stereo/training/
                        help='datapath')
    parser.add_argument('--epochs', type=int, default=10,
                        help='number of epochs to train')
    parser.add_argument('--batchsize', type=int, default=12)
    parser.add_argument('--gpu0', '-g', type=int, default=-1,
                        help='First GPU ID (negative value indicates CPU)')
    parser.add_argument('--gpu1', '-G', type=int, default=-1,
                        help='Second GPU ID (negative value indicates CPU)')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--out', default='result_fine/basic')
    parser.add_argument('--seed', type=int, default=1, metavar='S',
                        help='random seed (default: 1)')
    parser.add_argument('--model', default='/<modelpath>/model_iter_xxxxx.npz',
                        help='datapath')
    args = parser.parse_args()

    print('# GPU: {} ({})'.format(args.gpu0, args.gpu1))
    print('# datapath: {}'.format(args.datapath))
    print('# epoch: {}'.format(args.epochs))
    print('# plot: {}'.format(extensions.PlotReport.available()))
    print('')

    # Triggers
    log_trigger = (3, 'iteration')
    validation_trigger = (1, 'epoch')
    lr_change_trigger = (3, 'iteration')  # (200, 'epoch')
    snapshot_trigger = (1, 'epoch')
    end_trigger = (300, 'epoch')

    # Dataset
    # dataloader
    dataname_list = lt.dataloader(args.datapath)
    all_left_img, all_right_img, all_left_disp, test_left_img, test_right_img, test_left_disp = dataname_list
    # transform
    train = DA.myImageFolder(all_left_img, all_right_img, all_left_disp, True)
    test = DA.myImageFolder(
        test_left_img, test_right_img, test_left_disp, False)

    # Iterator
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)
    test_iter = chainer.iterators.MultiprocessIterator(test, args.batchsize,
                                                       shuffle=False, repeat=False)

    # Model
    model = basic(args.maxdisp, args.gpu0, args.gpu1,
                  training=True, train_type="kitti")
    if args.gpu0 >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu0).use()
        # model.to_gpu()  # Copy the model to the GPU

    # load model
    serializers.load_npz(args.model, model)

    # Optimizer
    optimizer = chainer.optimizers.Adam(alpha=0.001, beta1=0.9, beta2=0.999)
    optimizer.setup(model)
    # optimizer.add_hook(chainer.optimizer.WeightDecay(rate=0.0005))

    # Updater
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu0)

    # Trainer
    trainer = training.Trainer(updater, end_trigger, args.out)

    trainer.extend(extensions.LogReport(trigger=log_trigger))
    trainer.extend(extensions.observe_lr(), trigger=log_trigger)
    trainer.extend(extensions.dump_graph('main/loss'))
    # trainer.extend(TestModeEvaluator(test_iter, model, device=args.gpu0),
    #               trigger=log_trigger)
    trainer.extend(
        extensions.ExponentialShift('alpha', 0.1),
        trigger=triggers.ManualScheduleTrigger([200], 'epoch'))


    # plot loss
    if extensions.PlotReport.available():
        trainer.extend(extensions.PlotReport(
            ['main/loss'], x_key='iteration',
            file_name='loss.png'))

    # print progression
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'elapsed_time',
         'lr', 'main/loss', 'validation/main/loss']),
        trigger=log_trigger)
    trainer.extend(extensions.ProgressBar(update_interval=3))

    # save model paramter
    trainer.extend(extensions.snapshot(), trigger=snapshot_trigger)
    trainer.extend(
        extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}.npz'),
        trigger=snapshot_trigger)
    if args.resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(args.resume, trainer)

    # tensorboard Evaluator
    # [code] https://qiita.com/katotetsuro/items/6f3d3ebd43a6dee6d2b8
    #trainer.extend(TensorBoardReport(args.out), trigger=log_interval)

    trainer.run()
Exemplo n.º 7
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--test-batchsize', type=int, default=16)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        ('img', 'mb_loc', 'mb_label'),
        Transform(model.coder, model.insize, model.mean))

    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize //
                                                        comm.size,
                                                        n_processes=2)

    if comm.rank == 0:
        test = VOCBboxDataset(year='2007',
                              split='test',
                              use_difficult=True,
                              return_difficult=True)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     args.test_batchsize,
                                                     repeat=False,
                                                     shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step, 'iteration'))

    if comm.rank == 0:
        trainer.extend(DetectionVOCEvaluator(test_iter,
                                             model,
                                             use_07_metric=True,
                                             label_names=voc_bbox_label_names),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))

        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(),
                       trigger=triggers.ManualScheduleTrigger(
                           args.step + [args.iteration], 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 8
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(epic_kitchens_bbox_category_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = EpicKitchensBboxDataset(year='2018', split='train')
    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    train = TransformDataset(train, ('img', 'mb_loc', 'mb_label'),
                             Transform(model.coder, model.insize, model.mean))

    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    # http://chainermn.readthedocs.io/en/latest/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        n_processes=2)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (18, 'epoch'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}.npz'),
                       trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 9
0
    else:
        updater = MultiprocessParallelUpdater(train_iters, optimizer,
                                              devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'),
                               out=output_dir)

    if args.cosine:
        trainer.extend(
            CosineAnnealing('lr', int(args.epoch),
                            len(train) / args.batchsize,
                            eta_min=args.eta_min,
                            init=args.lr))
    else:
        trainer.extend(
            extensions.ExponentialShift('lr', 0.1, init=args.lr),
            trigger=triggers.ManualScheduleTrigger(
                [int(args.epoch * 0.50), int(args.epoch * 0.75)], 'epoch'))

    test_interval = 1, 'epoch'
    snapshot_interval = 10, 'epoch'
    log_interval = 100, 'iteration'

    trainer.extend(extensions.Evaluator(test_iter, model,
                   device=args.gpus[0]), trigger=test_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(
        filename='snapshot_epoch_{.updater.epoch}'), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_epoch_{.updater.epoch}'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
Exemplo n.º 10
0
                                              optimizer,
                                              devices=devices)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=output_dir)

    mst_epochs = [30, 60, 90]

    if args.cosine:
        trainer.extend(
            CosineAnnealing('lr',
                            int(args.epoch),
                            len(train) / args.batchsize,
                            eta_min=args.eta_min,
                            init=args.lr))
    else:
        trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                       trigger=triggers.ManualScheduleTrigger(
                           mst_epochs, 'epoch'))

    test_interval = 1, 'epoch'
    snapshot_interval = 10, 'epoch'
    log_interval = 10, 'iteration'

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpus[0]),
                   trigger=test_interval)
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(
        extensions.snapshot(filename='snapshot_epoch_{.updater.epoch}'),
        trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(model,
                                              'model_epoch_{.updater.epoch}'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
Exemplo n.º 11
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', type=int, default=1)
    parser.add_argument('--lr', type=float, default=1e-3)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    comm = chainermn.create_communicator()
    device = comm.intra_rank

    faster_rcnn = FasterRCNNVGG16(
        n_fg_class=len(epic_kitchens_bbox_label_names),
        pretrained_model='imagenet')

    faster_rcnn.use_preset('evaluate')
    model = FasterRCNNTrainChain(faster_rcnn)
    chainer.cuda.get_device_from_id(device).use()
    model.to_gpu()

    train = EpicKitchensBboxDataset(year='2018', split='train')
    if comm.rank == 0:
        indices = np.arange(len(train))
    else:
        indices = None
    train = TransformDataset(train, ('img', 'bbox', 'label', 'scale'),
                             Transform(faster_rcnn))

    indices = chainermn.scatter_dataset(indices, comm, shuffle=True)
    train = train.slice[indices]

    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=args.batchsize)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(), comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(rate=0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, (18, 'epoch'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger([12, 15], 'epoch'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(
            extensions.LogReport(log_name='log.json', trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'main/roi_loc_loss', 'main/roi_cls_loss', 'main/rpn_loc_loss',
            'main/rpn_cls_loss'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=1))

        trainer.extend(extensions.snapshot_object(
            model.faster_rcnn, 'model_iter_{.updater.iteration}.npz'),
                       trigger=(1, 'epoch'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 12
0
def get_trainer(args):
    config = yaml.load(open(args.config))

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Prepare ChainerMN communicator
    if args.gpu:
        if args.communicator == 'naive':
            print("Error: 'naive' communicator does not support GPU.\n")
            exit(-1)
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
    else:
        if args.communicator != 'naive':
            print('Warning: using naive communicator '
                  'because only naive supports CPU-only execution')
        comm = chainermn.create_communicator('naive')
        device = -1

    # Show the setup information
    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))
        if args.gpu:
            print('Using GPUs - max workspace size:',
                  chainer.cuda.get_max_workspace_size())
        print('Using {} communicator'.format(args.communicator))

    # Output version info
    if comm.rank == 0:
        print('Chainer version: {}'.format(chainer.__version__))
        print('ChainerMN version: {}'.format(chainermn.__version__))
        print('cuda: {}, cudnn: {}'.format(chainer.cuda.available,
                                           chainer.cuda.cudnn_enabled))

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
        model_fn = config['model']['module'].split('.')[-1]
        sys.path.insert(0, args.result_dir)
        config['model']['module'] = model_fn
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    if comm.rank == 0:
        print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config, comm)
    if args.gpu:
        chainer.cuda.get_device(device).use()
        model.to_gpu()
    if comm.rank == 0:
        print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    if comm.rank == 0:
        print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    if comm.rank == 0:
        train_dataset, valid_dataset = get_dataset_from_config(config)
        print('train_dataset: {}'.format(len(train_dataset)),
              train_dataset.__class__.__name__)
        print('valid_dataset: {}'.format(len(valid_dataset)),
              valid_dataset.__class__.__name__)
    else:
        train_dataset, valid_dataset = [], []
    train_dataset = chainermn.scatter_dataset(train_dataset, comm)
    valid_dataset = chainermn.scatter_dataset(valid_dataset, comm)

    # Create iterators
    # multiprocessing.set_start_method('forkserver')
    train_iter, valid_iter = create_iterators(train_dataset, valid_dataset,
                                              config)
    if comm.rank == 0:
        print('train_iter:', train_iter.__class__.__name__)
        print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater and trainer
    if 'updater_creator' in config:
        updater_creator = get_updater_creator_from_config(config)
        updater = updater_creator(train_iter, optimizer, device=device)
    else:
        updater = create_updater(train_iter, optimizer, device=device)
    if comm.rank == 0:
        print('updater:', updater.__class__.__name__)

    # Create Trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    if comm.rank == 0:
        print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport' and comm.rank == 0:
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr' and comm.rank == 0:
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph' and comm.rank == 0:
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            assert 'module' in values
            mod = import_module(values['module'])
            evaluator = getattr(mod, values['name'])
            if evaluator is extensions.Evaluator:
                evaluator = evaluator(valid_iter, model, device=device)
            else:
                evaluator = evaluator(valid_iter, model.predictor)
            evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport' and comm.rank == 0:
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport' and comm.rank == 0:
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar' and comm.rank == 0:
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot' and comm.rank == 0:
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    if 'lr_drop_poly_power' in config['optimizer']:
        power = config['optimizer']['lr_drop_poly_power']
        stop_trigger = config['stop_trigger']
        batchsize = train_iter.batch_size
        len_dataset = len(train_dataset)
        trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize,
                                       len_dataset),
                       trigger=(1, 'iteration'))

    # Resume
    if args.resume is not None:
        # fn = '{}.bak'.format(args.resume)
        # shutil.copy(args.resume, fn)
        serializers.load_npz(args.resume, trainer)
        if comm.rank == 0:
            print('Resumed from:', args.resume)

    if comm.rank == 0:
        print('==========================================')

    return trainer
Exemplo n.º 13
0
def main():
    args = parse_args()
    dump_args(args)

    # prepare dataset
    train, val, val_raw = prepare_dataset(full_data=args.full_data)
    train_iter = chainer.iterators.MultiprocessIterator(train,
                                                        args.batchsize,
                                                        shared_mem=4000000)
    val_iter = chainer.iterators.MultiprocessIterator(val,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False,
                                                      shared_mem=4000000)
    eval_iter = chainer.iterators.MultiprocessIterator(val_raw,
                                                       4,
                                                       repeat=False,
                                                       shuffle=False,
                                                       shared_mem=4000000)

    # setup model
    if args.model == 'unet':
        model = UnetCenterNet()
    elif args.model == 'res18unet':
        model = Res18UnetCenterNet()

    training_model = TrainingModel(model)
    if args.gpu >= 0:
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        training_model.to_gpu()

    # setup optimizer
    optimizer = chainer.optimizers.NesterovAG(lr=1e-3)
    optimizer.setup(training_model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-5))
    optimizer.add_hook(chainer.optimizer.GradientClipping(100.))

    # setup trainer
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args.gpu,
                                       converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # set trainer extensions
    if not args.full_data:
        trainer.extend(
            extensions.Evaluator(val_iter,
                                 training_model,
                                 device=args.gpu,
                                 converter=converter))
        trainer.extend(DetectionMapEvaluator(eval_iter, model))

    trainer.extend(extensions.snapshot_object(model,
                                              'model_{.updater.epoch}.npz'),
                   trigger=(10, 'epoch'))
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.LogReport())
    if args.full_data:
        trainer.extend(extensions.PrintReport(['epoch', 'main/loss']))
    else:
        trainer.extend(
            extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'eval/main/map'
            ]))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # learning rate scheduling
    lr_drop_epochs = [int(args.epoch * 0.5), int(args.epoch * 0.75)]
    lr_drop_trigger = triggers.ManualScheduleTrigger(lr_drop_epochs, 'epoch')
    trainer.extend(LearningRateDrop(0.1), trigger=lr_drop_trigger)
    trainer.extend(extensions.observe_lr())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    # start training
    trainer.run()
Exemplo n.º 14
0
def train(args):
    config = yaml.load(open(args.config))

    print('==========================================')

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Output version info
    print('chainer version: {}'.format(chainer.__version__))
    print('cuda: {}, cudnn: {}, nccl: {}'.format(chainer.cuda.available,
                                                 chainer.cuda.cudnn_enabled,
                                                 HAVE_NCCL))

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config)
    print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    train_dataset, valid_dataset = get_dataset_from_config(config)
    print('train_dataset: {}'.format(len(train_dataset)),
          train_dataset.__class__.__name__)
    print('valid_dataset: {}'.format(len(valid_dataset)),
          valid_dataset.__class__.__name__)

    # Prepare devices
    devices = {'main': args.gpus[0]}
    for gid in args.gpus[1:]:
        devices['gpu{}'.format(gid)] = gid

    # Create iterators
    train_iter, valid_iter = create_iterators(
        train_dataset, config['dataset']['train']['batchsize'], valid_dataset,
        config['dataset']['valid']['batchsize'], devices)
    print('train_iter:', train_iter.__class__.__name__)
    print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater
    updater_creator = get_updater_creator_from_config(config)
    updater = updater_creator(train_iter, optimizer, devices)
    print('updater:', updater.__class__.__name__)

    # Create trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport':
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr':
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph':
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            evaluator_creator = get_evaluator_creator_from_config(values)
            evaluator = evaluator_creator(valid_iter, model, devices)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport':
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport':
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar':
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot':
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)
        elif ext == 'ParameterStatistics':
            links = []
            for link_name in values.pop('links'):
                lns = [ln.strip() for ln in link_name.split('.') if ln.strip()]
                target = model.predictor
                for ln in lns:
                    target = getattr(target, ln)
                links.append(target)
            trainer.extend(extensions.ParameterStatistics(links, **values))
        elif ext == 'custom':
            custom_extension = get_custum_extension_from_config(values)
            trainer.extend(custom_extension, trigger=values['trigger'])

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    # Resume
    if args.resume is not None:
        fn = '{}.bak'.format(args.resume)
        shutil.copy(args.resume, fn)
        serializers.load_npz(args.resume, trainer)
        print('Resumed from:', args.resume)

    print('==========================================')

    trainer.run()
    return 0
Exemplo n.º 15
0
def main():
  parser = argparse.ArgumentParser()
  parser.add_argument('--model',
                      choices=('ssd300', 'ssd512'),
                      default='ssd300')
  parser.add_argument('--batchsize', type=int, default=32)
  parser.add_argument('--test-batchsize', type=int, default=16)
  parser.add_argument('--iteration', type=int, default=120000)
  parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
  parser.add_argument('--gpu', type=int, default=-1)
  parser.add_argument('--out', default='result')
  parser.add_argument('--resume')
  parser.add_argument('--dtype',
                      type=str,
                      choices=dtypes.keys(),
                      default='float32',
                      help='Select the data type of the model')
  parser.add_argument('--model-dir',
                      default=None,
                      type=str,
                      help='Where to store models')
  parser.add_argument('--dataset-dir',
                      default=None,
                      type=str,
                      help='Where to store datasets')
  parser.add_argument('--dynamic-interval',
                      default=None,
                      type=int,
                      help='Interval for dynamic loss scaling')
  parser.add_argument('--init-scale',
                      default=1,
                      type=float,
                      help='Initial scale for ada loss')
  parser.add_argument('--loss-scale-method',
                      default='approx_range',
                      type=str,
                      help='Method for adaptive loss scaling')
  parser.add_argument('--scale-upper-bound',
                      default=32800,
                      type=float,
                      help='Hard upper bound for each scale factor')
  parser.add_argument('--accum-upper-bound',
                      default=32800,
                      type=float,
                      help='Accumulated upper bound for all scale factors')
  parser.add_argument('--update-per-n-iteration',
                      default=100,
                      type=int,
                      help='Update the loss scale value per n iteration')
  parser.add_argument('--snapshot-per-n-iteration',
                      default=10000,
                      type=int,
                      help='The frequency of taking snapshots')
  parser.add_argument('--n-uf', default=1e-3, type=float)
  parser.add_argument('--nosanity-check', default=False, action='store_true')
  parser.add_argument('--nouse-fp32-update',
                      default=False, action='store_true')
  parser.add_argument('--profiling', default=False, action='store_true')
  parser.add_argument('--verbose',
                      action='store_true',
                      default=False,
                      help='Verbose output')
  args = parser.parse_args()

  # Setting data types
  if args.dtype != 'float32':
    chainer.global_config.use_cudnn = 'never'
  chainer.global_config.dtype = dtypes[args.dtype]
  print('==> Setting the data type to {}'.format(args.dtype))

  # Initialize model
  if args.model == 'ssd300':
    model = SSD300(n_fg_class=len(voc_bbox_label_names),
                   pretrained_model='imagenet')
  elif args.model == 'ssd512':
    model = SSD512(n_fg_class=len(voc_bbox_label_names),
                   pretrained_model='imagenet')

  model.use_preset('evaluate')

  # Apply adaptive loss scaling
  recorder = AdaLossRecorder(sample_per_n_iter=100)
  profiler = Profiler()
  sanity_checker = SanityChecker(check_per_n_iter=100) if not args.nosanity_check else None
  # Update the model to support AdaLoss
  # TODO: refactorize
  model_ = AdaLossScaled(
      model,
      init_scale=args.init_scale,
      cfg={
          'loss_scale_method': args.loss_scale_method,
          'scale_upper_bound': args.scale_upper_bound,
          'accum_upper_bound': args.accum_upper_bound,
          'update_per_n_iteration': args.update_per_n_iteration,
          'recorder': recorder,
          'profiler': profiler,
          'sanity_checker': sanity_checker,
          'n_uf_threshold': args.n_uf,
      },
      transforms=[
          AdaLossTransformLinear(),
          AdaLossTransformConvolution2D(),
      ],
      verbose=args.verbose)

  # Finalize the model
  train_chain = MultiboxTrainChain(model_)
  if args.gpu >= 0:
    chainer.cuda.get_device_from_id(args.gpu).use()
    cp.random.seed(0)

    # NOTE: we have to transfer modules explicitly to GPU
    model.coder.to_gpu()
    model.extractor.to_gpu()
    model.multibox.to_gpu()

  # Prepare dataset
  if args.model_dir is not None:
    chainer.dataset.set_dataset_root(args.model_dir)
  train = TransformDataset(
      ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                          VOCBboxDataset(year='2012', split='trainval')),
      Transform(model.coder, model.insize, model.mean, dtype=dtypes[args.dtype]))
  # train_iter = chainer.iterators.MultiprocessIterator(
  #     train, args.batchsize) # , n_processes=8, n_prefetch=2)
  train_iter = chainer.iterators.MultithreadIterator(train, args.batchsize)
  # train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

  test = VOCBboxDataset(year='2007',
                        split='test',
                        use_difficult=True,
                        return_difficult=True)
  test_iter = chainer.iterators.SerialIterator(test,
                                               args.test_batchsize,
                                               repeat=False,
                                               shuffle=False)

  # initial lr is set to 1e-3 by ExponentialShift
  optimizer = chainer.optimizers.MomentumSGD()
  if args.dtype == 'mixed16':
    if not args.nouse_fp32_update:
      print('==> Using FP32 update for dtype=mixed16')
      optimizer.use_fp32_update()  # by default use fp32 update

    # HACK: support skipping update by existing loss scaling functionality
    if args.dynamic_interval is not None:
      optimizer.loss_scaling(interval=args.dynamic_interval, scale=None)
    else:
      optimizer.loss_scaling(interval=float('inf'), scale=None)
      optimizer._loss_scale_max = 1.0  # to prevent actual loss scaling

  optimizer.setup(train_chain)
  for param in train_chain.params():
    if param.name == 'b':
      param.update_rule.add_hook(GradientScaling(2))
    else:
      param.update_rule.add_hook(WeightDecay(0.0005))

  updater = training.updaters.StandardUpdater(train_iter,
                                              optimizer,
                                              device=args.gpu)
  trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                             args.out)
  trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step, 'iteration'))

  trainer.extend(DetectionVOCEvaluator(test_iter,
                                       model,
                                       use_07_metric=True,
                                       label_names=voc_bbox_label_names),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step + [args.iteration], 'iteration'))

  log_interval = 10, 'iteration'
  trainer.extend(extensions.LogReport(trigger=log_interval))
  trainer.extend(extensions.observe_lr(), trigger=log_interval)
  trainer.extend(extensions.observe_value(
      'loss_scale',
      lambda trainer: trainer.updater.get_optimizer('main')._loss_scale),
      trigger=log_interval)

  metrics = [
      'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
      'main/loss/conf', 'validation/main/map'
  ]
  if args.dynamic_interval is not None:
    metrics.insert(2, 'loss_scale')
  trainer.extend(extensions.PrintReport(metrics), trigger=log_interval)
  trainer.extend(extensions.ProgressBar(update_interval=10))

  trainer.extend(extensions.snapshot(),
                 trigger=triggers.ManualScheduleTrigger(
                     args.step + [args.iteration], 'iteration'))
  trainer.extend(extensions.snapshot_object(
      model, 'model_iter_{.updater.iteration}'),
      trigger=(args.iteration, 'iteration'))

  if args.resume:
    serializers.load_npz(args.resume, trainer)

  hook = AdaLossMonitor(sample_per_n_iter=100,
                        verbose=args.verbose,
                        includes=['Grad', 'Deconvolution'])
  recorder.trainer = trainer
  hook.trainer = trainer

  with ExitStack() as stack:
    stack.enter_context(hook)
    trainer.run()

  recorder.export().to_csv(os.path.join(args.out, 'loss_scale.csv'))
  profiler.export().to_csv(os.path.join(args.out, 'profile.csv'))
  if sanity_checker:
    sanity_checker.export().to_csv(os.path.join(args.out, 'sanity_check.csv'))
  hook.export_history().to_csv(os.path.join(args.out, 'grad_stats.csv'))
Exemplo n.º 16
0
def main():
    parser = argparse.ArgumentParser(
        description='Chainer Multi-label classification')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=0,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=4,
                        help='Number of images in each mini-batch')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    args = parser.parse_args()

    model = get_resnet_50(len(voc_bbox_label_names))
    model.pick = 'fc6'
    train_chain = MultiLabelClassifier(model,
                                       loss_scale=len(voc_bbox_label_names))

    train = VOCBboxDataset(year='2007', split='trainval', use_difficult=False)
    train = TransformDataset(train, ('img', 'bbox'), bbox_to_multi_label)
    test = VOCBboxDataset(year='2007', split='test', use_difficult=False)
    test = TransformDataset(test, ('img', 'bbox'), bbox_to_multi_label)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        train_chain.to_gpu()  # Copy the model to the GPU

    optimizer = chainer.optimizers.MomentumSGD(0.001)
    optimizer.setup(train_chain)

    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(1e-4))

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    stop_trigger = (11, 'epoch')
    log_interval = (20, 'iteration')

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu,
                                                converter=converter)
    trainer = training.Trainer(updater, stop_trigger, out=args.out)
    trainer.extend(
        extensions.Evaluator(test_iter,
                             train_chain,
                             device=args.gpu,
                             converter=converter))
    trainer.extend(extensions.ExponentialShift('lr', 0.1),
                   trigger=triggers.ManualScheduleTrigger([8, 10], 'epoch'))

    trainer.extend(chainer.training.extensions.observe_lr(),
                   trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'lr',
        'epoch',
        'elapsed_time',
        'main/loss',
        'main/recall',
        'main/precision',
        'main/n_pred',
        'main/n_pos',
        'validation/main/loss',
        'validation/main/recall',
        'validation/main/precision',
        'validation/main/n_pred',
        'validation/main/n_pos',
    ]),
                   trigger=log_interval)

    trainer.extend(extensions.snapshot_object(model, 'snapshot_model.npz'))
    trainer.extend(extensions.LogReport(trigger=log_interval))

    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 17
0
def main():
    # Parse the arguments.
    args = parse_arguments()
    augment = False if args.augment == 'False' else True
    multi_gpu = False if args.multi_gpu == 'False' else True
    if args.label:
        labels = args.label
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label(label_list):
        label_arr = np.asarray(label_list, dtype=np.int32)
        return label_arr

    # Apply a preprocessor to the dataset.
    logging.info('Preprocess train dataset and test dataset...')
    preprocessor = preprocess_method_dict[args.method]()
    parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label,
                                  labels=labels, smiles_cols=['smiles_1', 'smiles_2'])
    train = parser.parse(args.train_datafile)['dataset']
    valid = parser.parse(args.valid_datafile)['dataset']

    if augment:
        logging.info('Utilizing data augmentation in train set')
        train = augment_dataset(train)

    num_train = train.get_datasets()[0].shape[0]
    num_valid = valid.get_datasets()[0].shape[0]
    logging.info('Train/test split: {}/{}'.format(num_train, num_valid))

    if len(args.net_hidden_dims):
        net_hidden_dims = tuple([int(net_hidden_dim) for net_hidden_dim in args.net_hidden_dims.split(',')])
    else:
        net_hidden_dims = ()
    fp_attention = True if args.fp_attention else False
    update_attention = True if args.update_attention else False
    weight_tying = False if args.weight_tying == 'False' else True
    attention_tying = False if args.attention_tying == 'False' else True
    fp_batch_normalization = True if args.fp_bn == 'True' else False
    layer_aggregator = None if args.layer_aggregator == '' else args.layer_aggregator
    context = False if args.context == 'False' else True
    output_activation = functions.relu if args.output_activation == 'relu' else None
    predictor = set_up_predictor(method=args.method,
                                 fp_hidden_dim=args.fp_hidden_dim, fp_out_dim=args.fp_out_dim, conv_layers=args.conv_layers,
                                 concat_hidden=args.concat_hidden, layer_aggregator=layer_aggregator,
                                 fp_dropout_rate=args.fp_dropout_rate, fp_batch_normalization=fp_batch_normalization,
                                 net_hidden_dims=net_hidden_dims, class_num=class_num,
                                 sim_method=args.sim_method, fp_attention=fp_attention, weight_typing=weight_tying, attention_tying=attention_tying,
                                 update_attention=update_attention,
                                 context=context, context_layers=args.context_layers, context_dropout=args.context_dropout,
                                 message_function=args.message_function, readout_function=args.readout_function,
                                 num_timesteps=args.num_timesteps, num_output_hidden_layers=args.num_output_hidden_layers,
                                 output_hidden_dim=args.output_hidden_dim, output_activation=output_activation,
                                 symmetric=args.symmetric
                                 )

    train_iter = SerialIterator(train, args.batchsize)
    test_iter = SerialIterator(valid, args.batchsize,
                              repeat=False, shuffle=False)

    metrics_fun = {'accuracy': F.binary_accuracy}
    classifier = Classifier(predictor, lossfun=F.sigmoid_cross_entropy,
                            metrics_fun=metrics_fun, device=args.gpu)

    # Set up the optimizer.
    optimizer = optimizers.Adam(alpha=args.learning_rate, weight_decay_rate=args.weight_decay_rate)
    # optimizer = optimizers.Adam()
    # optimizer = optimizers.SGD(lr=args.learning_rate)
    optimizer.setup(classifier)
    # add regularization
    if args.max_norm > 0:
        optimizer.add_hook(chainer.optimizer.GradientClipping(threshold=args.max_norm))
    if args.l2_rate > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args.l2_rate))
    if args.l1_rate > 0:
        optimizer.add_hook(chainer.optimizer.Lasso(rate=args.l1_rate))

    # Set up the updater.
    if multi_gpu:
        logging.info('Using multiple GPUs')
        updater = training.ParallelUpdater(train_iter, optimizer, devices={'main': 0, 'second': 1},
                                           converter=concat_mols)
    else:
        logging.info('Using single GPU')
        updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu,
                                           converter=concat_mols)

    # Set up the trainer.
    logging.info('Training...')
    # add stop_trigger parameter
    early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss', patients=30, max_trigger=(500, 'epoch'))
    out = 'output' + '/' + args.out
    trainer = training.Trainer(updater, stop_trigger=early_stop, out=out)

    # trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(E.Evaluator(test_iter, classifier,
                               device=args.gpu, converter=concat_mols))

    train_eval_iter = SerialIterator(train, args.batchsize,
                                       repeat=False, shuffle=False)

    trainer.extend(AccuracyEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_acc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(AccuracyEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_acc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(ROCAUCEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_roc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(ROCAUCEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_roc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(PRCAUCEvaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_prc',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(PRCAUCEvaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_prc',
        pos_labels=1, ignore_labels=-1))

    trainer.extend(F1Evaluator(
        train_eval_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='train_f',
        pos_labels=1, ignore_labels=-1, raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(F1Evaluator(
        test_iter, classifier, eval_func=predictor,
        device=args.gpu, converter=concat_mols, name='val_f',
        pos_labels=1, ignore_labels=-1))

    # apply shift strategy to learning rate every 10 epochs
    # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch'))
    if args.exp_shift_strategy == 1:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([10, 20, 30, 40, 50, 60], 'epoch'))
    elif args.exp_shift_strategy == 2:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30], 'epoch'))
    elif args.exp_shift_strategy == 3:
        trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate),
                       trigger=triggers.ManualScheduleTrigger([5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch'))
    else:
        raise ValueError('No such strategy to adapt learning rate')
    # # observation of learning rate
    trainer.extend(E.observe_lr(), trigger=(1, 'iteration'))

    entries = [
        'epoch',
        'main/loss', 'train_acc/main/accuracy', 'train_roc/main/roc_auc', 'train_prc/main/prc_auc',
        # 'train_p/main/precision', 'train_r/main/recall',
        'train_f/main/f1',
        'validation/main/loss', 'val_acc/main/accuracy', 'val_roc/main/roc_auc', 'val_prc/main/prc_auc',
        # 'val_p/main/precision', 'val_r/main/recall',
        'val_f/main/f1',
        'lr',
        'elapsed_time']
    trainer.extend(E.PrintReport(entries=entries))
    # change from 10 to 2 on Mar. 1 2019
    trainer.extend(E.snapshot(), trigger=(2, 'epoch'))
    trainer.extend(E.LogReport())
    trainer.extend(E.ProgressBar())
    trainer.extend(E.PlotReport(['main/loss', 'validation/main/loss'], 'epoch', file_name='loss.png'))
    trainer.extend(E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'], 'epoch', file_name='accuracy.png'))

    if args.resume:
        resume_path = os.path.join(out, args.resume)
        logging.info('Resume training according to snapshot in {}'.format(resume_path))
        chainer.serializers.load_npz(resume_path, trainer)

    trainer.run()

    # Save the regressor's parameters.
    model_path = os.path.join(out, args.model_filename)
    logging.info('Saving the trained models to {}...'.format(model_path))
    classifier.save_pickle(model_path, protocol=args.protocol)
Exemplo n.º 18
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('multi_task_300', 'multi_task_512'),
                        default='multi_task_300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--eval_step',
                        type=int,
                        nargs='*',
                        default=[80000, 100000, 120000])
    parser.add_argument('--lr_step',
                        type=int,
                        nargs='*',
                        default=[80000, 100000])
    parser.add_argument('--lr', type=float, default=0.001)
    parser.add_argument('--snap_step', type=int, default=10000)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out',
                        default='result')  # in experiments for real experiment
    parser.add_argument('--resume', type=str)
    parser.add_argument('--detection', action='store_true', default=False)
    parser.add_argument('--segmentation', action='store_true', default=False)
    parser.add_argument('--attention', action='store_true', default=False)
    parser.add_argument('--dataset', default='voc', type=str)
    parser.add_argument('--experiment', type=str, default='final_voc')
    parser.add_argument('--multitask_loss', action='store_true', default=False)
    parser.add_argument('--dynamic_loss', action='store_true', default=False)
    parser.add_argument('--log_interval', type=int, default=10)
    parser.add_argument('--debug', action='store_true', default=False)
    parser.add_argument('--update_split_interval', type=int, default=100)
    parser.add_argument(
        '--loss_split', type=float, default=0.5
    )  # in fact for detection, other task(segmentation) is 1-loss_split
    args = parser.parse_args()
    snap_step = args.snap_step
    args.snap_step = []
    for step in range(snap_step, args.iteration + 1, snap_step):
        args.snap_step.append(step)

    # redefine the output path
    import os
    import time
    args.out = os.path.join(args.out, args.experiment,
                            time.strftime("%Y%m%d_%H%M%S", time.localtime()))

    if args.model == 'multi_task_300':
        model = Multi_task_300(n_fg_class=len(voc_bbox_label_names),
                               pretrained_model='imagenet',
                               detection=args.detection,
                               segmentation=args.segmentation,
                               attention=args.attention)
    elif args.model == 'multi_task_512':
        model = Multi_task_512(n_fg_class=len(voc_bbox_label_names),
                               pretrained_model='imagenet',
                               detection=args.detection,
                               segmentation=args.segmentation,
                               attention=args.attention)

    model.use_preset('evaluate')
    if not (args.segmentation or args.detection):
        raise RuntimeError

    train_chain = MultiboxTrainChain(model,
                                     gpu=args.gpu >= 0,
                                     use_multi_task_loss=args.multitask_loss,
                                     loss_split=args.loss_split)
    train_chain.cleargrads()

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        Multi_task_VOC(voc_experiments[args.experiment][args.experiment +
                                                        '_train']),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(
        train, batch_size=args.batchsize)

    test = VOCBboxDataset(year='2007',
                          split='test',
                          use_difficult=True,
                          return_difficult=True)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    test_mask = VOCSemanticSegmentationDataset(split='val')
    test_mask_iter = chainer.iterators.SerialIterator(test_mask,
                                                      args.batchsize,
                                                      repeat=False,
                                                      shuffle=False)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    # optimizer.add_hook(GradientClipping(0.1))
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    '''if args.resume:
        serializers.load_npz(args.resume, trainer)'''
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=args.lr),
                   trigger=triggers.ManualScheduleTrigger(
                       args.lr_step, 'iteration'))

    if args.dataset == 'voc':
        use_07 = True
        label_names = voc_bbox_label_names
    elif args.dataset == 'coco':
        label_names = coco_bbox_label_names
    if args.detection and not args.debug:
        trainer.extend(MultitaskEvaluator(test_iter,
                                          model,
                                          args.dataset,
                                          use_07,
                                          label_names=label_names),
                       trigger=triggers.ManualScheduleTrigger(
                           args.eval_step + [args.iteration], 'iteration'))

    if args.segmentation and not args.debug:
        trainer.extend(MultitaskEvaluator(test_mask_iter,
                                          model,
                                          dataset=args.dataset,
                                          label_names=label_names,
                                          detection=False),
                       trigger=triggers.ManualScheduleTrigger(
                           args.eval_step + [args.iteration], 'iteration'))

    log_interval = args.log_interval, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    if args.segmentation and args.detection and args.dynamic_loss:
        trainer.extend(
            loss_split.LossSplit(trigger=(args.update_split_interval,
                                          'iteration')))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/mask',
        'main/loss/loc', 'main/loss/conf', 'main/loss/split'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(),
                   trigger=triggers.ManualScheduleTrigger(
                       args.snap_step + [args.iteration], 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=triggers.ManualScheduleTrigger(
                       args.snap_step + [args.iteration], 'iteration'))
    if args.resume:
        if 'model' in args.resume:
            serializers.load_npz(args.resume, model)
        else:
            serializers.load_npz(args.resume, trainer)

    print(args)

    trainer.run()
Exemplo n.º 19
0
def do():
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '--model',choices=('ssd300','ssd512'),default='ssd300')
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--iteration', type=int, default=64)
    parser.add_argument('--step', type=int, nargs='*', default=[8,16])
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    model = SSD300(
        n_fg_class=len(ssdd.labels),
        pretrained_model='imagenet'
    )
    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    """
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()
    """
    train = TransformDataset(
        train_dataset,
        Transform(model.coder,model.insize,model.mean),
    )
    train_iter = chainer.iterators.MultiprocessIterator(train,args.batchsize)

    test = test_dataset
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False,shuffle=False)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))
    
    updater = training.updaters.StandardUpdater(
        train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater,(args.iteration, 'iteration'),args.out)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=1e-3),
        trigger= triggers.ManualScheduleTrigger(args.step, 'iteration')
    )
    """
    trainer.extend(
        extensions.Evaluator(
            test_iter, model
        ),
        trigger=triggers.ManualScheduleTrigger(
            args.step + [args.iteration], 'iteration'
        )
    )
    """
    trainer.extend(extensions.ProgressBar(update_interval=1))
    #trainer.extend(extensions.LogReport(trigger=1))
    #trainer.extend(extensions.observe_lr(), trigger=1)
    #trainer.extend(extensions.PrintReport(
    #    ['epoch', 'iteration', 'lr',
    #    'main/loss', 'main/loss/loc', 'main/loss/conf',
    #    'validation/main/map', 'elapsed_time']),
    #    trigger=1)
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(
                ['main/loss', 'main/loss/loc', 'main/loss/conf'],
                'epoch', file_name='loss.png'))
        trainer.extend(
            extensions.PlotReport(
                ['validation/main/map'],
                'epoch', file_name='accuracy.png'))
    trainer.extend(extensions.snapshot(
        filename='snapshot_iter_{.updater.epoch}.npz'), 
        trigger=(4, 'iteration')
    )

    trainer.run()
Exemplo n.º 20
0
    dataset = TrainDataset(
        [VOCDataset(args.root, *t.split('-')) for t in args.train], model)

    iterator = chainer.iterators.MultiprocessIterator(
        dataset, args.batchsize, n_processes=2)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(TrainWrapper(model))
    optimizer.add_hook(CustomWeightDecay(0.0005, b={'lr': 2, 'decay': 0}))

    updater = training.StandardUpdater(iterator, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.output)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=0.001),
        trigger=triggers.ManualScheduleTrigger([80000, 100000], 'iteration'))

    snapshot_interval = 1000, 'iteration'
    log_interval = 10, 'iteration'

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']),
        trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
Exemplo n.º 21
0
    iterator = chainer.iterators.MultiprocessIterator(
        dataset, args.batchsize, n_processes=2)

    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(TrainWrapper(model))
    optimizer.add_hook(CustomWeightDecay(0.0005, b={'lr': 2, 'decay': 0}))

    updater = training.StandardUpdater(iterator, optimizer, device=args.gpu)
    trainer = training.Trainer(
        updater,
        (120000*32/args.batchsize, 'iteration'),
        args.output)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=0.001),
        trigger=triggers.ManualScheduleTrigger(
            [80000*32/args.batchsize, 100000*32/args.batchsize],
            'iteration'))

    snapshot_interval = 1000, 'iteration'
    log_interval = 10, 'iteration'

    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.snapshot(), trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=snapshot_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'main/loss', 'main/loc', 'main/conf', 'lr']),
        trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))
        self.saved_dir = saved_dir
        self.before_iter = before_iter
        self.save_after = save_after

    def __call__(self, trainer):
        curr_iter = trainer.updater.iteration + self.before_iter
        if curr_iter > self.save_after:
            chainer.serializers.save_npz(
                self.saved_dir + self.save_name[:-4] + '_' + str(curr_iter) +
                '.npz',
                model,
            )


steps = [200000, 400000]
lr_trigger = triggers.ManualScheduleTrigger(steps, 'iteration')

updater = training.updaters.StandardUpdater(train_iter,
                                            optimizer,
                                            device=gpu_id)
trainer = training.Trainer(updater, (iters, 'iteration'), 'ssd_result')
trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_trigger)
#trainer.extend(evaluator,trigger=(50000, 'iteration'))
trainer.extend(
    training.extensions.LogReport(log_name='ssd_report' + SAVE_PATH,
                                  trigger=(1000, 'iteration')))
trainer.extend(extensions.observe_lr(), trigger=(1000, 'iteration'))
trainer.extend(
    training.extensions.PrintReport(
        ['iteration', 'lr', 'main/loss', 'main/loss/loc', 'main/loss/conf']))
trainer.extend(save_model(model, SAVE_PATH, save_after=0),
Exemplo n.º 23
0
def get_trainer(args):
    config = yaml.load(open(args.config))

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Show the setup information
    print('==========================================')
    print('Chainer version: {}'.format(chainer.__version__))
    print('CuPy version: {}'.format(chainer.cuda.cupy.__version__))
    print('cuda: {}, cudnn: {}, nccl: {}'.format(
        chainer.cuda.available,
        chainer.cuda.cudnn_enabled,
    ))

    # Prepare devices
    print('Devices:')
    devices = {'main': args.gpus[0]}
    print('\tmain:', args.gpus[0])
    for gid in args.gpus[1:]:
        devices['gpu{}'.format(gid)] = gid
        print('\tgpu{}'.format(gid), gid)

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
        model_fn = config['model']['module'].split('.')[-1]
        sys.path.insert(0, args.result_dir)
        config['model']['module'] = model_fn
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config)
    print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    train_dataset, valid_dataset = get_dataset_from_config(config)
    print('train_dataset: {}'.format(len(train_dataset)),
          train_dataset.__class__.__name__)
    print('valid_dataset: {}'.format(len(valid_dataset)),
          valid_dataset.__class__.__name__)

    # Create iterators
    train_iter, valid_iter = create_iterators(train_dataset, valid_dataset,
                                              config)
    print('train_iter:', train_iter.__class__.__name__)
    print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater and trainer
    if 'updater_creator' in config:
        updater_creator = get_updater_creator_from_config(config)
        updater = updater_creator(train_iter, optimizer, devices)
    else:
        updater = create_updater(train_iter, optimizer, devices)
    print('updater:', updater.__class__.__name__)

    # Create Trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport':
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr':
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph':
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            assert 'module' in values
            mod = import_module(values['module'])
            evaluator = getattr(mod, values['name'])
            if evaluator is extensions.Evaluator:
                evaluator = evaluator(valid_iter, model, device=args.gpus[0])
            else:
                evaluator = evaluator(valid_iter, model.predictor)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport':
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport':
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar':
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot':
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)
        elif ext == 'ParameterStatistics':
            links = []
            for link_name in values.pop('links'):
                lns = [ln.strip() for ln in link_name.split('.') if ln.strip()]
                target = model.predictor
                for ln in lns:
                    target = getattr(target, ln)
                links.append(target)
            trainer.extend(extensions.ParameterStatistics(links, **values))
        elif ext == 'custom':
            custom_extension = get_custum_extension_from_config(values)
            trainer.extend(custom_extension)

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    if 'lr_drop_poly_power' in config['optimizer']:
        power = config['optimizer']['lr_drop_poly_power']
        stop_trigger = config['stop_trigger']
        batchsize = train_iter.batch_size
        len_dataset = len(train_dataset)
        trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize,
                                       len_dataset),
                       trigger=(1, 'iteration'))

    # Resume
    if args.resume is not None:
        serializers.load_npz(args.resume, trainer)
        print('Resumed from:', args.resume)

    print('==========================================')

    return trainer
Exemplo n.º 24
0
for param in train_chain.params():
    if param.name == 'b':
        param.update_rule.add_hook(GradientScaling(2))
    else:
        param.update_rule.add_hook(WeightDecay(0.0005))

updater = training.updaters.StandardUpdater(
    train_iter, optimizer, device=gpu_id)

trainer = training.Trainer(
    updater,
    (training_epoch, 'epoch'), out)

trainer.extend(
    extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr),
    trigger=triggers.ManualScheduleTrigger(lr_decay_timing, 'epoch'))

trainer.extend(
    DetectionVOCEvaluator(
        valid_iter, model, use_07_metric=False,
        label_names=bball_labels),
    trigger=(1, 'epoch'))
trainer.extend(extensions.LogReport(trigger=log_interval))
trainer.extend(extensions.observe_lr(), trigger=log_interval)
trainer.extend(extensions.PrintReport(
    ['epoch', 'iteration', 'lr',
     'main/loss', 'main/loss/loc', 'main/loss/conf',
     'validation/main/map', 'elapsed_time']),
    trigger=log_interval)
if extensions.PlotReport.available():
    trainer.extend(
Exemplo n.º 25
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--iteration', type=int, default=120000)
    parser.add_argument('--step', type=int, nargs='*', default=[80000, 100000])
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCBboxDataset(year='2007', split='trainval'),
                            VOCBboxDataset(year='2012', split='trainval')),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = VOCBboxDataset(year='2007',
                          split='test',
                          use_difficult=True,
                          return_difficult=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step, 'iteration'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step + [args.iteration], 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(),
                   trigger=triggers.ManualScheduleTrigger(
                       args.step + [args.iteration], 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(args.iteration, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 26
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--labelnum', type=int, default=50)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')

    parser.add_argument('--image_label',
                        '-il',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox', help='Path to training bbox list file')
    parser.add_argument('--image_label_test',
                        '-ilt',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox_test', help='Path to training bbox list file')

    parser.add_argument('--image_root',
                        '-TR',
                        default='.',
                        help='Root directory path of image files')

    args = parser.parse_args()

    comm = chainermn.create_communicator('naive')
    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    from test_datasets import DeepFashionBboxDataset

    if comm.rank == 0:
        train = DeepFashionBboxDataset(args.bbox, args.image_label,
                                       args.image_root)
        test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test,
                                      args.image_root)

        train = TransformDataset(
            train, Transform(model.coder, model.insize, model.mean))
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)

    checkpoint_interval = (1000, 'iteration')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    evaluator = DetectionVOCEvaluator(test_iter,
                                      model,
                                      use_07_metric=True,
                                      label_names=voc_bbox_label_names)

    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=(10000, 'iteration'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(120000, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 27
0
def handler(context):
    dataset_alias = context.datasets
    trainval_2007_dataset_id = dataset_alias['trainval2007']
    trainval_2012_dataset_id = dataset_alias['trainval2012']
    test_2007_dataset_id = dataset_alias['test2007']

    trainval_2007_dataset = list(
        load_dataset_from_api(trainval_2007_dataset_id))
    trainval_2012_dataset = list(
        load_dataset_from_api(trainval_2012_dataset_id))
    test_2007_dataset = list(load_dataset_from_api(test_2007_dataset_id))

    if network_model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')
    elif network_model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_bbox_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if USE_GPU >= 0:
        chainer.cuda.get_device_from_id(USE_GPU).use()
        model.to_gpu()

    trainval_2007 = DetectionDatasetFromAPI(trainval_2007_dataset)
    trainval_2012 = DetectionDatasetFromAPI(trainval_2012_dataset)
    test_2007 = DetectionDatasetFromAPI(test_2007_dataset,
                                        use_difficult=True,
                                        return_difficult=True)

    train = TransformDataset(ConcatenatedDataset(trainval_2007, trainval_2012),
                             Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE)

    test_iter = chainer.iterators.SerialIterator(test_2007,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=USE_GPU)
    trainer = training.Trainer(updater, (nb_iterations, 'iteration'),
                               out=ABEJA_TRAINING_RESULT_DIR)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    trainer.extend(DetectionVOCEvaluator(test_iter,
                                         model,
                                         use_07_metric=True,
                                         label_names=voc_bbox_label_names),
                   trigger=(10000, 'iteration'))

    log_interval = 100, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)

    print_entries = [
        'iteration', 'main/loss', 'main/loss/loc', 'main/loss/conf',
        'validation/main/map'
    ]
    report_entries = [
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]

    trainer.extend(Statistics(report_entries,
                              nb_iterations,
                              obs_key='iteration'),
                   trigger=log_interval)
    trainer.extend(Tensorboard(report_entries, out_dir=log_path))
    trainer.extend(extensions.PrintReport(print_entries), trigger=log_interval)

    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(nb_iterations, 'iteration'))

    trainer.run()
Exemplo n.º 28
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--gpu', '-g', type=int, default=0)
    parser.add_argument('--model', '-m', type=str, default=None)
    parser.add_argument('--opt', type=str, default=None)
    parser.add_argument('--epoch', '-e', type=int, default=10)
    parser.add_argument('--lr', '-l', type=float, default=0.001)
    parser.add_argument('--batch', '-b', type=int, default=32)
    parser.add_argument('--noplot',
                        dest='plot',
                        action='store_false',
                        help='Disable PlotReport extension')
    args = parser.parse_args()

    print("Loading datas")
    max_value = 200
    train, validation = LoadData(max_value=max_value,
                                 N=20000,
                                 validation_split=True)

    # Set up a neural network to train.
    print("Building model")
    model = NaluLayer(2, 1)

    if args.gpu >= 0:
        # Make a specified GPU current
        chainer.backends.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    optimizer = optimizers.Adam(alpha=args.lr)
    optimizer.setup(model)

    train_iter = iterators.SerialIterator(train,
                                          batch_size=args.batch,
                                          shuffle=True)
    test_iter = iterators.SerialIterator(validation,
                                         batch_size=args.batch,
                                         repeat=False,
                                         shuffle=False)

    if args.model != None:
        print("loading model from " + args.model)
        serializers.load_npz(args.model, model)

    if args.opt != None:
        print("loading opt from " + args.opt)
        serializers.load_npz(args.opt, optimizer)

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out='results')

    trainer.extend(extensions.Evaluator(test_iter, model, device=args.gpu))
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))

    # Snapshot
    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    #serializers.load_npz('./results/snapshot_iter_1407', trainer)

    # Decay learning rate
    points = [args.epoch * 0.75]
    trainer.extend(extensions.ExponentialShift('alpha', 0.1),
                   trigger=triggers.ManualScheduleTrigger(points, 'epoch'))

    # Save two plot images to the result dir
    if args.plot and extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(['main/loss', 'validation/main/loss'],
                                  'epoch',
                                  file_name='loss.png'))

    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss']),
                   trigger=(1, 'iteration'))
    trainer.extend(extensions.ProgressBar(update_interval=1))

    #Plot computation graph
    trainer.extend(extensions.dump_graph('main/loss'))

    # Train
    trainer.run()

    # Save results
    modelname = "./results/model"
    print("saving model to " + modelname)
    serializers.save_npz(modelname, model)

    optimizername = "./results/optimizer"
    print("saving optimizer to " + optimizername)
    serializers.save_npz(optimizername, optimizer)

    # Estimate model
    model = model = NaluLayer(2, 1, return_prediction=True)
    weight_dir = "./results/model"
    print("Loading model from " + weight_dir)
    serializers.load_npz(weight_dir, model)

    n_test = 10
    test = LoadData(N=n_test, validation_split=False)
    loss, y = model(test)
    y = cuda.to_cpu(y.data)
    #print(test[0], y[0])
    for i in range(n_test):
        print('-' * 10)
        print('Q:  ', round(test[i, 0] * max_value), 'x',
              round(test[i, 1] * max_value))
        print('A:  ', round(test[i, 2] * (max_value**2)))
        print('P:  ', round(y[i, 0] * (max_value**2)))
    print('-' * 10)
Exemplo n.º 29
0
def main():
    # Parse the arguments.
    args = parse_arguments()
    if args['label']:
        labels = args['label']
        class_num = len(labels) if isinstance(labels, list) else 1
    else:
        raise ValueError('No target label was specified.')

    # Dataset preparation. Postprocessing is required for the regression task.
    def postprocess_label(label_list):
        label_arr = np.asarray(label_list, dtype=np.int32)
        return label_arr

    # Apply a preprocessor to the dataset.
    logging.info('Preprocess train dataset and valid dataset...')
    # use `ggnn` for the time being
    preprocessor = preprocess_method_dict['ggnn']()
    # parser = CSVFileParserForPair(preprocessor, postprocess_label=postprocess_label,
    #                               labels=labels, smiles_cols=['smiles_1', 'smiles_2'])
    if args['feature'] == 'molenc':
        parser = MolAutoencoderParserForPair(
            preprocessor,
            postprocess_label=postprocess_label,
            labels=labels,
            smiles_cols=['smiles_1', 'smiles_2'])
    if args['feature'] == 'ssp':
        parser = SSPParserForPair(preprocessor,
                                  postprocess_label=postprocess_label,
                                  labels=labels,
                                  smiles_cols=['smiles_1', 'smiles_2'])
    else:
        parser = Mol2VecParserForPair(preprocessor,
                                      postprocess_label=postprocess_label,
                                      labels=labels,
                                      smiles_cols=['smiles_1', 'smiles_2'])
    train = parser.parse(args['train_datafile'])['dataset']
    valid = parser.parse(args['valid_datafile'])['dataset']

    if args['augment']:
        logging.info('Utilizing data augmentation in train set')
        train = augment_dataset(train)

    num_train = train.get_datasets()[0].shape[0]
    num_valid = valid.get_datasets()[0].shape[0]
    logging.info('Train/test split: {}/{}'.format(num_train, num_valid))

    if len(args['net_hidden_dims']):
        net_hidden_dims = tuple([
            int(net_hidden_dim)
            for net_hidden_dim in args['net_hidden_dims'].split(',')
        ])
    else:
        net_hidden_dims = ()

    predictor = set_up_predictor(fp_out_dim=args['fp_out_dim'],
                                 net_hidden_dims=net_hidden_dims,
                                 class_num=class_num,
                                 sim_method=args['sim_method'],
                                 symmetric=args['symmetric'])

    train_iter = SerialIterator(train, args['batchsize'])
    test_iter = SerialIterator(valid,
                               args['batchsize'],
                               repeat=False,
                               shuffle=False)

    metrics_fun = {'accuracy': F.binary_accuracy}
    classifier = Classifier(predictor,
                            lossfun=F.sigmoid_cross_entropy,
                            metrics_fun=metrics_fun,
                            device=args['gpu'])

    # Set up the optimizer.
    optimizer = optimizers.Adam(alpha=args['learning_rate'],
                                weight_decay_rate=args['weight_decay_rate'])
    # optimizer = optimizers.Adam()
    # optimizer = optimizers.SGD(lr=args.learning_rate)
    optimizer.setup(classifier)
    # add regularization
    if args['max_norm'] > 0:
        optimizer.add_hook(
            chainer.optimizer.GradientClipping(threshold=args['max_norm']))
    if args['l2_rate'] > 0:
        optimizer.add_hook(chainer.optimizer.WeightDecay(rate=args['l2_rate']))
    if args['l1_rate'] > 0:
        optimizer.add_hook(chainer.optimizer.Lasso(rate=args['l1_rate']))

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=args['gpu'],
                                       converter=concat_mols)

    # Set up the trainer.
    logging.info('Training...')
    # add stop_trigger parameter
    early_stop = triggers.EarlyStoppingTrigger(monitor='validation/main/loss',
                                               patients=10,
                                               max_trigger=(500, 'epoch'))
    out = 'output' + '/' + args['out']
    trainer = training.Trainer(updater, stop_trigger=early_stop, out=out)

    trainer.extend(
        E.Evaluator(test_iter,
                    classifier,
                    device=args['gpu'],
                    converter=concat_mols))

    train_eval_iter = SerialIterator(train,
                                     args['batchsize'],
                                     repeat=False,
                                     shuffle=False)

    trainer.extend(
        AccuracyEvaluator(train_eval_iter,
                          classifier,
                          eval_func=predictor,
                          device=args['gpu'],
                          converter=concat_mols,
                          name='train_acc',
                          pos_labels=1,
                          ignore_labels=-1,
                          raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        AccuracyEvaluator(test_iter,
                          classifier,
                          eval_func=predictor,
                          device=args['gpu'],
                          converter=concat_mols,
                          name='val_acc',
                          pos_labels=1,
                          ignore_labels=-1))

    trainer.extend(
        ROCAUCEvaluator(train_eval_iter,
                        classifier,
                        eval_func=predictor,
                        device=args['gpu'],
                        converter=concat_mols,
                        name='train_roc',
                        pos_labels=1,
                        ignore_labels=-1,
                        raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        ROCAUCEvaluator(test_iter,
                        classifier,
                        eval_func=predictor,
                        device=args['gpu'],
                        converter=concat_mols,
                        name='val_roc',
                        pos_labels=1,
                        ignore_labels=-1))

    trainer.extend(
        PRCAUCEvaluator(train_eval_iter,
                        classifier,
                        eval_func=predictor,
                        device=args['gpu'],
                        converter=concat_mols,
                        name='train_prc',
                        pos_labels=1,
                        ignore_labels=-1,
                        raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        PRCAUCEvaluator(test_iter,
                        classifier,
                        eval_func=predictor,
                        device=args['gpu'],
                        converter=concat_mols,
                        name='val_prc',
                        pos_labels=1,
                        ignore_labels=-1))

    trainer.extend(
        F1Evaluator(train_eval_iter,
                    classifier,
                    eval_func=predictor,
                    device=args['gpu'],
                    converter=concat_mols,
                    name='train_f',
                    pos_labels=1,
                    ignore_labels=-1,
                    raise_value_error=False))
    # extension name='validation' is already used by `Evaluator`,
    # instead extension name `val` is used.
    trainer.extend(
        F1Evaluator(test_iter,
                    classifier,
                    eval_func=predictor,
                    device=args['gpu'],
                    converter=concat_mols,
                    name='val_f',
                    pos_labels=1,
                    ignore_labels=-1))

    # apply shift strategy to learning rate every 10 epochs
    # trainer.extend(E.ExponentialShift('alpha', args.exp_shift_rate), trigger=(10, 'epoch'))
    if args['exp_shift_strategy'] == 1:
        trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']),
                       trigger=triggers.ManualScheduleTrigger(
                           [10, 20, 30, 40, 50, 60], 'epoch'))
    elif args['exp_shift_strategy'] == 2:
        trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']),
                       trigger=triggers.ManualScheduleTrigger(
                           [5, 10, 15, 20, 25, 30], 'epoch'))
    elif args['exp_shift_strategy'] == 3:
        trainer.extend(E.ExponentialShift('alpha', args['exp_shift_rate']),
                       trigger=triggers.ManualScheduleTrigger(
                           [5, 10, 15, 20, 25, 30, 40, 50, 60, 70], 'epoch'))
    else:
        raise ValueError('No such strategy to adapt learning rate')
    # # observation of learning rate
    trainer.extend(E.observe_lr(), trigger=(1, 'iteration'))

    entries = [
        'epoch',
        'main/loss',
        'train_acc/main/accuracy',
        'train_roc/main/roc_auc',
        'train_prc/main/prc_auc',
        # 'train_p/main/precision', 'train_r/main/recall',
        'train_f/main/f1',
        'validation/main/loss',
        'val_acc/main/accuracy',
        'val_roc/main/roc_auc',
        'val_prc/main/prc_auc',
        # 'val_p/main/precision', 'val_r/main/recall',
        'val_f/main/f1',
        'lr',
        'elapsed_time'
    ]
    trainer.extend(E.PrintReport(entries=entries))
    # change from 10 to 2 on Mar. 1 2019
    trainer.extend(E.snapshot(), trigger=(2, 'epoch'))
    trainer.extend(E.LogReport())
    trainer.extend(E.ProgressBar())
    trainer.extend(
        E.PlotReport(['main/loss', 'validation/main/loss'],
                     'epoch',
                     file_name='loss.png'))
    trainer.extend(
        E.PlotReport(['train_acc/main/accuracy', 'val_acc/main/accuracy'],
                     'epoch',
                     file_name='accuracy.png'))

    if args['resume']:
        resume_path = os.path.join(out, args['resume'])
        logging.info(
            'Resume training according to snapshot in {}'.format(resume_path))
        chainer.serializers.load_npz(resume_path, trainer)

    trainer.run()

    # Save the regressor's parameters.
    model_path = os.path.join(out, args['model_filename'])
    logging.info('Saving the trained models to {}...'.format(model_path))
    classifier.save_pickle(model_path, protocol=args['protocol'])
Exemplo n.º 30
0
def create_extension(trainer, test_iter, model, config, devices=None):
    """Create extension for training models"""
    for key, ext in config.items():
        if key == "Evaluator":
            cl = get_class(ext['module'])
            Evaluator = getattr(cl, ext['name'])
            trigger = parse_trigger(ext['trigger'])
            args = parse_dict(ext, 'args', {})
            if parse_dict(args, 'label_names', 'voc') == 'voc':
                args['label_names'] = voc_bbox_label_names
            trainer.extend(Evaluator(test_iter, model, **args),
                           trigger=trigger)
        elif key == "dump_graph":
            cl = getattr(extensions, key)
            trainer.extend(cl(ext['name']))
        elif key == 'snapshot':
            cl = getattr(extensions, key)
            trigger = parse_trigger(ext['trigger'])
            trainer.extend(cl(), trigger=trigger)
        elif key == 'snapshot_object':
            cl = getattr(extensions, key)
            trigger = parse_trigger(ext['trigger'])
            args = parse_dict(ext, 'args', {})
            if args:
                if args['method'] == 'best':
                    trigger = triggers.MaxValueTrigger(args['name'], trigger)
            trainer.extend(cl(model, 'yolov2_{.updater.iteration}'),
                           trigger=trigger)
        elif key == 'LogReport':
            cl = getattr(extensions, key)
            trigger = parse_trigger(ext['trigger'])
            trainer.extend(cl(trigger=trigger))
        elif key == "PrintReport":
            cl = getattr(extensions, key)
            report_list = ext['name'].split(' ')
            trigger = parse_trigger(ext['trigger'])
            trainer.extend(cl(report_list), trigger=trigger)
        elif key == "ProgressBar":
            cl = getattr(extensions, key)
            trainer.extend(cl(update_interval=ext['update_interval']))
        elif key == 'observe_lr':
            cl = getattr(extensions, key)
            trigger = parse_trigger(ext['trigger'])
            trainer.extend(cl(), trigger=trigger)
        elif key == "PolynomialShift":
            cl = getattr(lr_utils, key)
            trigger = parse_trigger(ext['trigger'])
            len_dataset = len(trainer.updater.get_iterator('main').dataset)
            batchsize = trainer.updater.get_iterator('main').batch_size
            args = parse_dict(ext, 'args', {})
            args.update({
                'len_dataset': len_dataset,
                'batchsize': batchsize,
                'stop_trigger': trainer.stop_trigger
            })
            trainer.extend(cl(**args))
        elif key == "DarknetLRScheduler":
            cl = getattr(lr_utils, key)
            args = parse_dict(ext, 'args', {})
            args['step_trigger'] = [int(num) for num in args['step_trigger']]
            trainer.extend(cl(**args))
        elif key == "ExponentialShift":
            cl = getattr(extensions, key)
            attr = ext['attr']
            rate = ext['rate']
            name = ext['name']
            numbers = [int(num) for num in ext['numbers']]
            trainer.extend(cl(attr, rate),
                           trigger=triggers.ManualScheduleTrigger(
                               numbers, name))

    return trainer