Пример #1
0
def compare_loss_scaling_by_nonzero(net,
                                    cfgs,
                                    init_scale=1,
                                    dataset='mnist',
                                    verbose=False,
                                    sample_iterations=None,
                                    includes=None,
                                    manual_seed=0,
                                    device=-1,
                                    dtype=chainer.mixed16,
                                    n_epoch=10,
                                    learnrate=0.01):
    """ Collect the number of nonzero at various points during
        training and see their relationship with the loss scaling
        method.

        The model should be provided.

        Different loss scaling method is specified by cfgs (a list)
    """
    # history
    hists = []

    with chainer.using_config('dtype', dtype):
        # create loss scaled model
        for i, cfg in enumerate(cfgs):
            net_ = net.copy(mode='copy')  # deeply copy the original link
            if init_scales 
            net_ = AdaLossScaled(net_,
                                 init_scale=init_scale,
                                 cfg=cfg,
                                 verbose=verbose)
            # prepare the hook that records necessary values
            hook = AdaLossMonitor(sample_iterations=sample_iterations,
                                  includes=includes,
                                  verbose=verbose)
            # collect data
            utils.set_random_seed(manual_seed, device=device)
            if dataset == 'mnist':
                hooks, log = train_utils.train_model_on_mnist(
                    net_,
                    epoch=n_epoch,
                    batchsize=128,
                    device=device,
                    learnrate=learnrate,
                    hooks=[hook])
            else:
                raise ValueError('dataset name not found: {}'.format(dataset))

    # prepare the sampled results
    df = hooks[0].export_history()
Пример #2
0
def set_random_seed(args, device):
    # Set up random seed
    if args.manual_seed is not None:
        utils.set_random_seed(args.manual_seed, device=device)
Пример #3
0
def train_model_on_cifar(net,
                         dataset='cifar10',
                         n_epoch=164,
                         batchsize=128,
                         device=-1,
                         learnrate=0.1,
                         lr_decay=0.1,
                         schedule=None,
                         weight_decay=1e-4,
                         manual_seed=None,
                         warmup_attr_ratio=None,
                         warmup_n_epoch=None,
                         cleanup=True,
                         tmpdir=None,
                         recorder=None,
                         hooks=None):
    """ Train a model on the cifar dataset """
    # Mean and Std
    _mean = np.array([0.4914, 0.4822, 0.4465],
                     dtype=chainer.get_dtype()).reshape([3, 1, 1])
    _std = np.array([0.2023, 0.1994, 0.2010],
                    dtype=chainer.get_dtype()).reshape([3, 1, 1])
    # Set up random seed
    if manual_seed is not None:
        utils.set_random_seed(manual_seed, device=device)

    if dataset == 'cifar10':
        print('Using CIFAR10 dataset.')
        class_labels = 10
        train, test = chainer.datasets.get_cifar10()
        mean = _mean
        std = _std
    elif dataset == 'cifar100':
        print('Using CIFAR100 dataset.')
        class_labels = 100
        train, test = chainer.datasets.get_cifar100()
        mean = np.array([0.5071, 0.4867, 0.4408],
                        dtype=chainer.get_dtype()).reshape([3, 1, 1])
        std = np.array([0.2675, 0.2565, 0.2761],
                       dtype=chainer.get_dtype()).reshape([3, 1, 1])
    else:
        raise RuntimeError('Invalid dataset choice.')

    train = PreprocessCIFARTrainData(train, mean=mean, std=std)
    test = PreprocessCIFARTestData(test, mean=mean, std=std)
    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # Model initialisation
    model = L.Classifier(net)
    model.to_device(device)

    # Create optimizer
    # NOTE: here the momentum is 0.9 by default
    if warmup_attr_ratio is not None:
        learnrate *= warmup_attr_ratio
    optimizer = chainer.optimizers.MomentumSGD(learnrate)

    if chainer.get_dtype() == chainer.mixed16:
        print('==> Using FP32 update for dtype=mixed16')
        optimizer.use_fp32_update()  # by default use fp32 update
        # TODO: loss scaling

    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer_hooks.WeightDecay(weight_decay))

    # Setting up the trigger for stopping training
    stop_trigger = (n_epoch, 'epoch')

    # Set up a trainer
    if tmpdir is None:
        tmpdir = '/tmp'
    out = tempfile.mkdtemp(prefix='{}_train-'.format(dataset), dir=tmpdir)
    updater = training.updaters.StandardUpdater(train_iter,
                                                optimizer,
                                                device=device)
    trainer = training.Trainer(updater, stop_trigger, out=out)

    if recorder is not None:
        recorder.setup(trainer)

    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.Evaluator(test_iter, model, device=device))
    trainer.extend(extensions.observe_lr())
    trainer.extend(
        extensions.PrintReport([
            'epoch',
            'lr',
            'main/loss',
            'validation/main/loss',
            'main/accuracy',
            'validation/main/accuracy',
            'elapsed_time',
        ]))
    trainer.extend(extensions.snapshot(
        filename='snapshot_epoch_{.updater.epoch}', snapshot_on_error=True),
                   trigger=(1, 'epoch'))
    lr_shift = chainerlp.extensions.ExponentialShift(
        'lr',
        lr_decay,
        warmup_attr_ratio=warmup_attr_ratio,
        warmup_n_epoch=warmup_n_epoch,
        schedule=schedule)
    trainer.extend(lr_shift, trigger=(1, 'epoch'))
    trainer.extend(extensions.ProgressBar())

    # RUN
    if hooks is None:
        hooks = []
    with ExitStack() as stack:
        for hook in hooks:
            if hasattr(hook, 'trainer'):
                hook.trainer = trainer  # patch the hooks
            stack.enter_context(hook)
        trainer.run()
    log = notebook_utils.load_train_log(train_dir=out)

    if cleanup:
        print('==> Cleaning up {} ...'.format(out))
        shutil.rmtree(out)

    return hooks, log
Пример #4
0
import tempfile
import pickle
import numpy as np
import chainer
import chainer.functions as F  # testing purpose
import chainer.links as L
from chainer import testing
from chainer import Function, FunctionNode, gradient_check, report, training, Variable
from chainer import datasets, initializers, iterators, optimizers, serializers
from chainer import Link, Chain, ChainList
from chainer.datasets import mnist  # for trainer test

from chainerlp import utils
from chainerlp.hooks.act_stat_hook import ActStatFuncHook

utils.set_random_seed(0)


class TestActStatHook(unittest.TestCase):
    def test_forward(self):
        """ ActStatFuncHook should work properly for the forward pass
            of a model. Properly means the input data should be correctly
            collected.
        """
        hook = ActStatFuncHook()
        with hook:
            data = np.random.random((3, 3)).astype(np.float32) - 0.5
            x = chainer.Variable(data)
            _ = F.relu(x)

        self.assertEqual(len(hook.call_history), 1)  # one history recorded
Пример #5
0
def train(n_layer,
          init_scale=1,
          scale_upper_bound=128,
          accum_upper_bound=4096,
          method='approx_range',
          update_per_n_iteration=1,
          warmup_attr_ratio=None,
          warmup_n_epoch=None,
          n_class=10,
          manual_seed=0,
          train_batch=128,
          device=-1,
          learnrate=0.1):
    """ Train function """
    utils.set_random_seed(manual_seed, device=device)

    # Recorder for loss scale values
    recorder = AdaLossRecorder(sample_per_n_iter=100)

    with chainer.using_config('dtype', chainer.mixed16):
        if n_layer == 16 or n_layer == 19:
            net_ = VGGNetCIFAR(n_layer, n_class=n_class)
        elif n_layer == 164:
            net_ = ResNetCIFARv2(n_layer, n_class=n_class)
        else:
            net_ = ResNetCIFAR(n_layer, n_class=n_class)

        net = AdaLossScaled(
            net_,
            init_scale=init_scale,
            cfg={
                'loss_scale_method': method,
                'scale_upper_bound': scale_upper_bound,
                'accum_upper_bound': accum_upper_bound,
                'recorder': recorder,
                'update_per_n_iteration': update_per_n_iteration,
                'n_uf_threshold': 1e-3,
            },
            transforms=[
                transforms.AdaLossTransformLinear(),
                transforms.AdaLossTransformConvolution2D(),
                transforms.AdaLossTransformBatchNormalization(),
                # customized transform for chainerlp models
                chainerlp_transforms.AdaLossTransformConv2DBNActiv(),
                chainerlp_transforms.AdaLossTransformBasicBlock(),
                chainerlp_transforms.AdaLossTransformBNActivConv2D(),
                chainerlp_transforms.AdaLossTransformBottleneckv2(),
            ],
            verbose=True)

        hook = AdaLossMonitor(sample_per_n_iter=100,
                              verbose=False,
                              includes=['Grad', 'Deconvolution'])
        utils.set_random_seed(manual_seed, device=device)
        hooks, log = train_utils.train_model_on_cifar(
            net,
            dataset='cifar{}'.format(n_class),
            learnrate=learnrate,
            batchsize=train_batch,
            device=device,
            schedule=[81, 122],
            warmup_attr_ratio=warmup_attr_ratio,
            warmup_n_epoch=warmup_n_epoch,
            hooks=[hook],
            recorder=recorder)

    # post processing
    grad_stats = hooks[0].export_history()
    loss_scale = recorder.export()

    return grad_stats, loss_scale, log