Пример #1
0
    def test_update_with_gpu(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm)
        self.optimizer.setup(self.target)
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)
        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2

        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)

        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(self.optimizer.target.a.W.grad,
                                        (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(self.optimizer.target.b.W.grad,
                                        (base + 1) * np.ones((4, 3)))
        chainer.testing.assert_allclose(self.optimizer.target.c.W.grad,
                                        (base + 2) * np.ones((5, 4)))
Пример #2
0
    def __init__(
            self,
            model_parameters,
            # Learning rate at training step s with annealing
            initial_lr=1e-4,
            final_lr=1e-5,
            annealing_steps=1600000,
            # Learning rate as used by the Adam algorithm
            beta_1=0.9,
            beta_2=0.99,
            # Adam regularisation parameter
            eps=1e-8,
            initial_training_step=0,
            communicator=None):
        self.initial_lr = initial_lr
        self.final_lr = final_lr
        self.annealing_steps = annealing_steps
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.eps = eps

        lr = self.compute_lr_at_step(initial_training_step)
        self.optimizer = optimizers.Adam(lr,
                                         beta1=beta_1,
                                         beta2=beta_2,
                                         eps=eps)
        self.optimizer.setup(model_parameters)

        self.multi_node_optimizer = None
        if communicator:
            self.multi_node_optimizer = chainermn.create_multi_node_optimizer(
                self.optimizer, communicator)
    def __init__(
            self,
            model_parameters,
            # Learning rate at training step s with annealing
            mu_i=5.0 * 1e-4,
            mu_f=5.0 * 1e-5,
            n=1.6 * 1e6,
            # Learning rate as used by the Adam algorithm
            beta_1=0.9,
            beta_2=0.99,
            # Adam regularisation parameter
            eps=1e-8,
            communicator=None):
        self.mu_i = mu_i
        self.mu_f = mu_f
        self.n = n
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.eps = eps

        lr = self.mu_s(0)
        self.optimizer = optimizers.Adam(lr,
                                         beta1=beta_1,
                                         beta2=beta_2,
                                         eps=eps)
        self.optimizer.setup(model_parameters)

        self.multi_node_optimizer = None
        if communicator:
            self.multi_node_optimizer = chainermn.create_multi_node_optimizer(
                self.optimizer, communicator)
    def test_update_with_gpu(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm)
        self.optimizer.setup(self.target)
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)
        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2

        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)

        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(self.optimizer.target.a.W.grad,
                                        (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(self.optimizer.target.b.W.grad,
                                        (base + 1) * np.ones((4, 3)))
        chainer.testing.assert_allclose(self.optimizer.target.c.W.grad,
                                        (base + 2) * np.ones((5, 4)))
Пример #5
0
    def __init__(
            self,
            model_parameters,
            # Learning rate at training step s with annealing
            lr_i=1.0 * 1e-4,
            lr_f=1.0 * 1e-5,
            n=10000,
            # Learning rate as used by the Adam algorithm
            beta_1=0.9,
            beta_2=0.99,
            # Adam regularisation parameter
            eps=1e-8,
            communicator=None):
        super().__init__(lr_i, lr_f, n)
        self.beta_1 = beta_1
        self.beta_2 = beta_2
        self.eps = eps

        lr = self.mu_s(0)
        self.optimizer = Eve(lr, beta1=beta_1, beta2=beta_2, eps=eps)
        self.optimizer.setup(model_parameters)

        if communicator:
            self.multi_node_optimizer = chainermn.create_multi_node_optimizer(
                self.optimizer, communicator)
Пример #6
0
 def setup_optimizer(self, alpha=0.0005):
     if self.comm is None:
         self.optimizer = optimizers.Adam(alpha)
     else:
         self.optimizer = chainermn.create_multi_node_optimizer(
             optimizers.Adam(alpha), self.comm)
     self.optimizer.setup(self)
Пример #7
0
    def setup_mnist_trainer(self, display_log=False):
        batchsize = 100
        n_units = 100

        comm = self.communicator
        model = L.Classifier(MLP(n_units, 10))

        optimizer = chainermn.create_multi_node_optimizer(
            chainer.optimizers.Adam(), comm)
        optimizer.setup(model)

        if comm.rank == 0:
            train, test = chainer.datasets.get_mnist()
        else:
            train, test = None, None

        train = chainermn.scatter_dataset(train, comm, shuffle=True)
        test = chainermn.scatter_dataset(test, comm, shuffle=True)

        train_iter = chainer.iterators.SerialIterator(train, batchsize)
        test_iter = chainer.iterators.SerialIterator(test, batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        updater = training.StandardUpdater(
            train_iter,
            optimizer
        )

        return updater, optimizer, train_iter, test_iter, model
Пример #8
0
def main(args, model, x, t, valid_rate=0.2):
    print('Start a training script using multiple nodes.')

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank
    assert device >= 0, 'invalid device ID: {}'.format(device)

    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))
        print('Using GPUs')
        print('Using {} communicator'.format(args.communicator))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    if comm.rank == 0:
        threshold = int(len(t) * (1 - valid_rate))
        train = datasets.tuple_dataset.TupleDataset(x[0:threshold],
                                                    t[0:threshold])
        valid = datasets.tuple_dataset.TupleDataset(x[threshold:],
                                                    t[threshold:])
        datasize = len(train) * args.epoch
    else:
        train, valid = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    valid = chainermn.scatter_dataset(valid, comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  args.batchsize,
                                                  repeat=False,
                                                  shuffle=False)

    if device >= 0:
        cuda.get_device_from_id(device).use()
        model.to_gpu()

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.SGD(lr=2e-4), comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(1e-2))

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    evaluator = extensions.Evaluator(valid_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)

    prepare_extensions(trainer, evaluator, args, comm)

    trainer.run()

    if comm.rank == 0:
        throughput = datasize / trainer.elapsed_time
        print('Throughput: {} [images/sec.] ({} / {})'.format(
            throughput, datasize, trainer.elapsed_time))

        model_filepath = os.path.join(args.out, 'trained.model')
        chainer.serializers.save_npz(model_filepath, model)
Пример #9
0
 def make_optimizer(model, comm, alpha=0.0002, beta1=0.5):
     # Create a multi node optimizer from a standard Chainer optimizer.
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm)
     optimizer.setup(model)
     optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec')
     return optimizer
Пример #10
0
 def make_optimizer(model, comm, alpha=0.0002, beta1=0.5):
     # Create a multi node optimizer from a standard Chainer optimizer.
     optimizer = chainermn.create_multi_node_optimizer(
         chainer.optimizers.Adam(alpha=alpha, beta1=beta1), comm)
     optimizer.setup(model)
     optimizer.add_hook(chainer.optimizer.WeightDecay(0.0001), 'hook_dec')
     return optimizer
Пример #11
0
    def setup_mnist_trainer(self, display_log=False, use_chx=False):
        batchsize = 100
        n_units = 100

        comm = self.communicator
        model = L.Classifier(MLP(n_units, 10))

        model.to_device(get_device(None, use_chx))

        optimizer = chainermn.create_multi_node_optimizer(
            chainer.optimizers.Adam(), comm)
        optimizer.setup(model)

        if comm.rank == 0:
            train, test = chainer.datasets.get_mnist()
        else:
            train, test = None, None

        train = chainermn.scatter_dataset(train, comm, shuffle=True)
        test = chainermn.scatter_dataset(test, comm, shuffle=True)

        train_iter = chainer.iterators.SerialIterator(train, batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        updater = training.StandardUpdater(train_iter, optimizer)

        return updater, optimizer, train_iter, test_iter, model
Пример #12
0
def _prepare_multinode_snapshot(n, result):
    n_units = 100
    batchsize = 10
    comm = create_communicator('naive')
    model = L.Classifier(MLP(n_units, 10))
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    if comm.rank == 0:
        train, _ = chainer.datasets.get_mnist()
    else:
        train, _ = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    train_iter = chainer.iterators.SerialIterator(train, batchsize)

    updater = StandardUpdater(train_iter, optimizer)
    trainer = Trainer(updater, out=result)

    snapshot = extensions.snapshot(target=updater, autoload=True)
    replica_sets = []
    mn_snapshot = multi_node_snapshot(comm, snapshot, replica_sets)
    mn_snapshot.initialize(trainer)
    for _ in range(n):
        updater.update()

    return updater, mn_snapshot, trainer
Пример #13
0
def objective(trial, comm):
    # Sample an architecture.
    model = L.Classifier(create_model(trial))

    # Setup optimizer.
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)

    # Setup dataset and iterator. Only worker 0 loads the whole dataset.
    # The dataset of worker 0 is evenly split and distributed to all workers.
    if comm.rank == 0:
        train, valid = chainer.datasets.get_mnist()
        rng = np.random.RandomState(0)
        train = chainer.datasets.SubDataset(
            train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train))
        )
        valid = chainer.datasets.SubDataset(
            valid, 0, N_VALID_EXAMPLES, order=rng.permutation(len(valid))
        )
    else:
        train, valid = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    valid = chainermn.scatter_dataset(valid, comm)

    train_iter = chainer.iterators.SerialIterator(train, BATCHSIZE, shuffle=True)
    valid_iter = chainer.iterators.SerialIterator(valid, BATCHSIZE, repeat=False, shuffle=False)

    # Setup trainer.
    updater = chainer.training.StandardUpdater(train_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"))

    # Add Chainer extension for pruners.
    trainer.extend(
        optuna.integration.ChainerPruningExtension(
            trial, "validation/main/accuracy", (PRUNER_INTERVAL, "epoch")
        )
    )
    evaluator = chainer.training.extensions.Evaluator(valid_iter, model)
    trainer.extend(chainermn.create_multi_node_evaluator(evaluator, comm))
    log_report_extension = chainer.training.extensions.LogReport(log_name=None)
    trainer.extend(log_report_extension)

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.ProgressBar())

    # Run training.
    # Please set show_loop_exception_msg False to inhibit messages about TrialPruned exception.
    # ChainerPruningExtension raises TrialPruned exception to stop training, and
    # trainer shows some messages every time it receive TrialPruned.
    trainer.run(show_loop_exception_msg=False)

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(valid_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    return report["main/accuracy"]
def run_test_observation_aggregator(comm, xp, use_chainer_variable,
                                    communicate_interval, use_gpu):
    model = DummyChain()

    if use_gpu:
        # Use CuPy's Device class to force call cudaSetDevice()
        chainer.cuda.get_device_from_id(comm.intra_rank).use()

    device = get_device(comm.intra_rank if use_gpu else None, xp == chainerx)

    if xp == chainerx:
        train = xp.array(np.random.rand(10, 1).astype(np.float32))
    else:
        train = xp.random.rand(10, 1).astype(np.float32)

    model.to_device(device)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=1,
                                                  repeat=True,
                                                  shuffle=True)

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    updater = chainer.training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=device)

    trainer = chainer.training.Trainer(updater, (1, 'epoch'))

    @extension.make_extension(trigger=(1, 'iteration'),
                              priority=extension.PRIORITY_WRITER)
    def rank_reporter(trainer_):
        tmp = xp.asarray(comm.rank, dtype=np.float32)
        if use_chainer_variable:
            tmp = chainer.Variable(tmp)
        trainer_.observation['rank'] = tmp

    @extension.make_extension(trigger=(communicate_interval, 'iteration'),
                              priority=extension.PRIORITY_READER)
    def aggregated_rank_checker(trainer_):
        actual = trainer_.observation['rank-aggregated']
        if use_chainer_variable:
            actual = actual.data
        expected = (comm.size - 1) / 2
        chainer.testing.assert_allclose(actual, expected)

    trainer.extend(rank_reporter)
    trainer.extend(
        ObservationAggregator(comm,
                              'rank',
                              'rank-aggregated',
                              comm_trigger=(communicate_interval,
                                            'iteration')))
    trainer.extend(aggregated_rank_checker)

    trainer.run()
Пример #15
0
def make_optimizer(model, comm, config):
    # Select from https://docs.chainer.org/en/stable/reference/optimizers.html.
    # NOTE: The order of the arguments for optimizers follows their definitions.
    opt_algorithm = yaml_utils.load_optimizer(config.optimizer['algorithm'],
                                              args=config.optimizer['args'])
    optimizer = chainermn.create_multi_node_optimizer(opt_algorithm, comm)
    optimizer.setup(model)
    return optimizer
Пример #16
0
def make_adam(model, lr=0.0002, beta1=0.9, beta2=0.999):
    optimizer = chainer.optimizers.Adam(alpha=lr, beta1=beta1, beta2=beta2)

    if chainer.config.using_chainermn:
        optimizer = chainermn.create_multi_node_optimizer(
            optimizer, chainer.config.communicator)

    optimizer.setup(model)
    return optimizer
Пример #17
0
def objective(trial, comm):
    # Sample an architecture.
    model = L.Classifier(create_model(trial))

    # Setup optimizer.
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)

    # Setup dataset and iterator. Only worker 0 loads the whole dataset.
    # The dataset of worker 0 is evenly split and distributed to all workers.
    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
        rng = np.random.RandomState(0)
        train = chainer.datasets.SubDataset(train,
                                            0,
                                            N_TRAIN_EXAMPLES,
                                            order=rng.permutation(len(train)))
        test = chainer.datasets.SubDataset(test,
                                           0,
                                           N_TEST_EXAMPLES,
                                           order=rng.permutation(len(test)))
    else:
        train, test = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  BATCHSIZE,
                                                  shuffle=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    # Setup trainer.
    updater = chainer.training.StandardUpdater(train_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch'))

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.ProgressBar())

    # Run training.
    trainer.run()

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(test_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    # The following line mitigates the memory problem in CircleCI
    # (see https://github.com/pfnet/optuna/pull/325 for more details).
    gc.collect()

    return 1.0 - report['main/accuracy']
Пример #18
0
 def test_can_create_valid_wrapper_for_chainermn(self):
     optimizer = create_marked_profile_optimizer(
         chainermn.create_multi_node_optimizer(optimizers.SGD(lr=1.0),
                                               None),
         sync=True)
     self.assertIsNotNone(optimizer)
     np.testing.assert_allclose([optimizer.lr], [1.0])
     self.assertIsInstance(optimizer, _MarkedProfileOptimizerForMN)
     self.assertNotIsInstance(optimizer.actual_optimizer, chainer.Optimizer)
     self.assertIsInstance(optimizer.actual_optimizer.actual_optimizer,
                           chainer.Optimizer)
Пример #19
0
def make_optimizer(model, comm, alpha=0.001, beta1=0.9, beta2=0.999, chmn=False, add_decay=False):
    # # 12/2018: problem in minoas, probably related with openmpi.
    if chmn:
        optimizer = chainermn.create_multi_node_optimizer(
            chainer.optimizers.Adam(alpha=alpha, beta1=beta1, beta2=beta2), comm)
    else:
        optimizer = chainer.optimizers.Adam(alpha=alpha, beta1=beta1, beta2=beta2)
    optimizer.setup(model)
    if add_decay:
        optimizer.add_hook(chainer.optimizer.WeightDecay(0.00001), 'hook_dec')
    #optimizer.add_hook(chainer.optimizer_hooks.GradientClipping(0.1), 'hook_clip')
    return optimizer
Пример #20
0
def get_optimizer(args, comm, model):
    if args.optimizer == 'momentum_sgd':
        actual_optimizer = chainer.optimizers.MomentumSGD()
    elif args.optimizer == 'adam':
        actual_optimizer = chainer.optimizers.Adam()
    elif args.optimizer == 'rmsprop_warmup':
        actual_optimizer = dlframeworks.chainer.optimizers.RMSpropWarmup()
    else:
        actual_optimizer = chainer.optimizers.RMSprop()
    optimizer = chainermn.create_multi_node_optimizer(actual_optimizer, comm)
    optimizer.setup(model)
    return optimizer
Пример #21
0
def objective(trial, comm):
    # Sample an architecture.
    model = L.Classifier(create_model(trial))

    # Setup optimizer.
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)

    # Setup dataset and iterator. Only worker 0 loads the whole dataset.
    # The dataset of worker 0 is evenly split and distributed to all workers.
    if comm.rank == 0:
        train, valid = chainer.datasets.get_mnist()
        rng = np.random.RandomState(0)
        train = chainer.datasets.SubDataset(train,
                                            0,
                                            N_TRAIN_EXAMPLES,
                                            order=rng.permutation(len(train)))
        valid = chainer.datasets.SubDataset(valid,
                                            0,
                                            N_VALID_EXAMPLES,
                                            order=rng.permutation(len(valid)))
    else:
        train, valid = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    valid = chainermn.scatter_dataset(valid, comm)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  BATCHSIZE,
                                                  shuffle=True)
    valid_iter = chainer.iterators.SerialIterator(valid,
                                                  BATCHSIZE,
                                                  repeat=False,
                                                  shuffle=False)

    # Setup trainer.
    updater = chainer.training.StandardUpdater(train_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (EPOCH, "epoch"))

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.ProgressBar())

    # Run training.
    trainer.run()

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(valid_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    return report["main/accuracy"]
Пример #22
0
 def make_optimizer(self, model, alpha, beta1, beta2):
     self.print_log(
         'Use Adam Optimizer with alpah = {}, beta1 = {}, beta2 = {}'.
         format(alpha, beta1, beta2))
     optimizer = chainer.optimizers.Adam(alpha=alpha,
                                         beta1=beta1,
                                         beta2=beta2)
     if self.use_mpi:
         self.print_log('Use Optimizer with MPI')
         optimizer = chainermn.create_multi_node_optimizer(
             optimizer, self.comm)
     optimizer.setup(model)
     return optimizer
def objective(trial, comm):
    device = comm.intra_rank
    chainer.cuda.get_device_from_id(device).use()

    # Sample an architecture.
    model = L.Classifier(create_model(trial))
    model.to_gpu()

    # Setup optimizer.
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)

    # Setup dataset and iterator. Only worker 0 loads the whole dataset.
    # The dataset of worker 0 is evenly split and distributed to all workers.
    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
        rng = np.random.RandomState(0)
        train = chainer.datasets.SubDataset(
            train, 0, N_TRAIN_EXAMPLES, order=rng.permutation(len(train)))
        test = chainer.datasets.SubDataset(
            test, 0, N_TEST_EXAMPLES, order=rng.permutation(len(test)))
    else:
        train, test = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm)

    train_iter = chainer.iterators.SerialIterator(
        train, BATCHSIZE, shuffle=True)
    test_iter = chainer.iterators.SerialIterator(
        test, BATCHSIZE, repeat=False, shuffle=False)

    # Setup trainer.
    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=device)
    trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch'))

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.ProgressBar())

    # Run training.
    trainer.run()

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(
        test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    return report['main/accuracy']
Пример #24
0
    def test_mnist(self, display_log=True):
        # This test file is intended to be run on Travis-CI and
        # GPU is not used for now.
        epoch = 5
        batchsize = 100
        n_units = 100

        comm = chainermn.create_communicator('naive')
        model = L.Classifier(MLP(n_units, 10))
        optimizer = chainermn.create_multi_node_optimizer(
            chainer.optimizers.Adam(), comm)
        optimizer.setup(model)

        if comm.rank == 0:
            train, test = chainer.datasets.get_mnist()
        else:
            train, test = None, None

        train = chainermn.scatter_dataset(train, comm)
        test = chainermn.scatter_dataset(test, comm)

        train_iter = chainer.iterators.SerialIterator(train, batchsize)
        test_iter = chainer.iterators.SerialIterator(test,
                                                     batchsize,
                                                     repeat=False,
                                                     shuffle=False)

        updater = training.StandardUpdater(train_iter, optimizer)
        trainer = training.Trainer(updater, (epoch, 'epoch'))

        # Wrap standard Chainer evaluators by MultiNodeEvaluator.
        evaluator = extensions.Evaluator(test_iter, model)
        evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
        trainer.extend(evaluator)

        # Some display and output extensions are necessary only for one worker.
        # (Otherwise, there would just be repeated outputs.)
        if comm.rank == 0 and display_log:
            trainer.extend(extensions.LogReport(trigger=(1, 'epoch')),
                           trigger=(1, 'epoch'))
            trainer.extend(extensions.PrintReport([
                'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
                'validation/main/accuracy', 'elapsed_time'
            ],
                                                  out=sys.stderr),
                           trigger=(1, 'epoch'))
        trainer.run()

        err = evaluator()['validation/main/accuracy']
        self.assertGreaterEqual(err, 0.95)
    def test_update(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm, double_buffering=True)
        opt = self.optimizer.setup(self.target)
        assert opt is self.optimizer
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)
        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2

        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 0)
        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 1) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 2) * np.ones((5, 4)))

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 3
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 4
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 5
        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 3) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 4) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 5) * np.ones((5, 4)))
        # barrier() requires before destructor of PureNcclCommunicator
        # because communication may not be finished.
        self.comm.mpi_comm.barrier()
    def test_update(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm, double_buffering=True)
        opt = self.optimizer.setup(self.target)
        assert opt is self.optimizer
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)
        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2

        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 0)
        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad, (base + 0) * np.ones(
                (3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad, (base + 1) * np.ones(
                (4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad, (base + 2) * np.ones(
                (5, 4)))

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 3
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 4
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 5
        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad, (base + 3) * np.ones(
                (3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad, (base + 4) * np.ones(
                (4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad, (base + 5) * np.ones(
                (5, 4)))
        # barrier() requires before destructor of PureNcclCommunicator
        # because communication may not be finished.
        self.comm.mpi_comm.barrier()
 def make_optimizer(model):
     if args.optimizer in [
             'SGD', 'Momentum', 'CMomentum', 'AdaGrad', 'RMSprop',
             'NesterovAG', 'LBFGS'
     ]:
         optimizer = optim[args.optimizer](lr=args.learning_rate)
     elif args.optimizer in ['AdaDelta']:
         optimizer = optim[args.optimizer]()
     elif args.optimizer in ['Adam', 'AdaBound', 'Eve']:
         optimizer = optim[args.optimizer](
             alpha=args.learning_rate, weight_decay_rate=args.weight_decay)
     if args.mpi:
         optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
     optimizer.setup(model)
     return optimizer
    def check_update(self, batched_copy):
        self.setup(batched_copy)
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm, double_buffering=True)
        opt = self.optimizer.setup(self.target)
        assert opt is self.optimizer
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)
        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2

        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 0)
        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 1) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 2) * np.ones((5, 4)))

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 3
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 4
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 5
        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 3) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 4) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 5) * np.ones((5, 4)))
        self.comm.finalize()
Пример #29
0
def objective(trial, comm):
    # Sample an architecture.
    model = L.Classifier(create_model(trial))

    # Setup optimizer.
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(model)
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)

    # Setup dataset and iterator.
    train, test = chainer.datasets.get_mnist()
    rng = np.random.RandomState(0)
    train = chainer.datasets.SubDataset(train,
                                        0,
                                        N_TRAIN_EXAMPLES,
                                        order=rng.permutation(len(train)))
    test = chainer.datasets.SubDataset(test,
                                       0,
                                       N_TEST_EXAMPLES,
                                       order=rng.permutation(len(test)))

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm)

    train_iter = chainer.iterators.SerialIterator(train,
                                                  BATCHSIZE,
                                                  shuffle=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 BATCHSIZE,
                                                 repeat=False,
                                                 shuffle=False)

    # Setup trainer.
    updater = chainer.training.StandardUpdater(train_iter, optimizer)
    trainer = chainer.training.Trainer(updater, (EPOCH, 'epoch'))

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.ProgressBar())

    # Run training.
    trainer.run()

    # Evaluate.
    evaluator = chainer.training.extensions.Evaluator(test_iter, model)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    report = evaluator()

    return 1.0 - report['main/accuracy']
Пример #30
0
 def make_optimizer(self, model, alpha, beta1, beta2):
     self.print_log(
         'Use Adam Optimizer with alpah = {}, beta1 = {}, beta2 = {}'.
         format(alpha, beta1, beta2))
     optimizer = chainer.optimizers.Adam(alpha=alpha,
                                         beta1=beta1,
                                         beta2=beta2)
     if self.use_mpi:
         self.print_log('Use Optimizer with MPI')
         optimizer = chainermn.create_multi_node_optimizer(
             optimizer, self.comm)
     # if self.nvprof:
     # optimizer = create_marked_profile_optimizer(optimizer, sync=True, sync_level=2)
     optimizer.setup(model)
     optimizer.add_hook(chainer.optimizer.GradientClipping(5))
     return optimizer
Пример #31
0
def run_test_observation_aggregator(comm, xp, use_chainer_variable,
                                    communicate_interval, use_cupy):
    model = DummyChain()
    if use_cupy:
        model.to_gpu()
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    train = xp.random.rand(10, 1).astype(np.float32)
    train_iter = chainer.iterators.SerialIterator(train,
                                                  batch_size=1,
                                                  repeat=True,
                                                  shuffle=True)

    updater = chainer.training.StandardUpdater(train_iter, optimizer)

    trainer = chainer.training.Trainer(updater, (1, 'epoch'))

    @extension.make_extension(trigger=(1, 'iteration'),
                              priority=extension.PRIORITY_WRITER)
    def rank_reporter(trainer):
        tmp = xp.asarray(comm.rank, dtype=np.float32)
        if use_chainer_variable:
            tmp = chainer.Variable(tmp)
        trainer.observation['rank'] = tmp

    @extension.make_extension(trigger=(communicate_interval, 'iteration'),
                              priority=extension.PRIORITY_READER)
    def aggregated_rank_checker(trainer):
        actual = trainer.observation['rank-aggregated']
        if use_chainer_variable:
            actual = actual.data
        expected = (comm.size - 1) / 2
        chainer.testing.assert_allclose(actual, expected)

    trainer.extend(rank_reporter)
    trainer.extend(
        ObservationAggregator(comm,
                              'rank',
                              'rank-aggregated',
                              comm_trigger=(communicate_interval,
                                            'iteration')))
    trainer.extend(aggregated_rank_checker)

    trainer.run()
Пример #32
0
def train(x_data,
          t_data,
          batchsize=128,
          layer=1,
          in_units=1,
          hidden_units=5,
          out_units=1):

    comm = chainermn.create_communicator('naive')

    # Iterator
    batchsize = batchsize
    x_data = chainermn.scatter_dataset(x_data, comm)
    t_data = chainermn.scatter_dataset(t_data, comm)
    train_iter = iterators.SerialIterator(x_data, batchsize)
    test_iter = iterators.SerialIterator(t_data,
                                         batchsize,
                                         repeat=False,
                                         shuffle=False)

    # setup model
    model = LSTM(in_units, hidden_units, out_units)

    # setup optimizer
    optimizer = chainermn.create_multi_node_optimizer(optimizers.Adam(), comm)
    optimizer.setup(model)

    updater = training.StandardUpdater(train_iter, optimizer, MyConverter)
    trainer = training.Trainer(updater, (20, 'epoch'), out='result')
    if comm.rank == 0:
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.observe_lr())
        trainer.extend(extensions.Evaluator(test_iter, model, MyConverter),
                       name='val')
        trainer.extend(
            extensions.PrintReport(
                ['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr']))
        trainer.extend(
            extensions.PlotReport(['main/loss', 'val/main/loss'],
                                  x_key='epoch',
                                  file_name='loss.png'))
#    trainer.extend(extensions.ProgressBar())

    trainer.run()
def _setup_optimizer(config, model, comm):
    optimizer_name = config['optimizer']
    lr = float(config['init_lr'])
    weight_decay = float(config['weight_decay'])
    if optimizer_name == 'Adam':
        optimizer = Adam(alpha=lr, weight_decay_rate=weight_decay)
    elif optimizer_name in \
            ('SGD', 'MomentumSGD', 'CorrectedMomentumSGD', 'RMSprop'):
        optimizer = eval(optimizer_name)(lr=lr)
        if weight_decay > 0.:
            optimizer.add_hook(WeightDecay(weight_decay))
    else:
        raise ValueError('Invalid optimizer: {}'.format(optimizer_name))
    if comm is not None:
        optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(model)

    return optimizer
Пример #34
0
    def __init__(
            self,
            model_parameters,
            # Learning rate at training step s with annealing
            lr_i=1.0 * 1e-4,
            lr_f=1.0 * 1e-5,
            n=10000,
            communicator=None):
        super().__init__(lr_i, lr_f, n)

        lr = self.mu_s(0)
        self.optimizer = optimizers.MomentumSGD(lr)
        self.optimizer.setup(model_parameters)

        self.multi_node_optimizer = None
        if communicator:
            self.multi_node_optimizer = chainermn.create_multi_node_optimizer(
                self.optimizer, communicator)
Пример #35
0
    def test_update_with_gpu(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm)
        opt = self.optimizer.setup(self.target)
        assert opt is self.optimizer
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)

        with self.target.init_scope():
            c = chainer.links.Linear(4, 4)
            c.to_gpu()
            self.target.c = c
        if self.comm.rank == 0:
            self.target.c.W.data[:] = self.comm.rank + 2
        self.optimizer.setup(self.target)
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)

        send_buf = chainer.cuda.to_cpu(self.optimizer.target.c.W.data)
        recv_buf = self.comm.mpi_comm.allgather(send_buf)
        for i in range(1, self.comm.size):
            chainer.testing.assert_allclose(recv_buf[0], recv_buf[i])

        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 2
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)

        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(self.optimizer.target.a.W.grad,
                                        (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(self.optimizer.target.b.W.grad,
                                        (base + 1) * np.ones((4, 3)))
        chainer.testing.assert_allclose(self.optimizer.target.c.W.grad,
                                        (base + 2) * np.ones((4, 4)))
    def test_update(self):
        self.setup_gpu()
        self.optimizer = chainermn.create_multi_node_optimizer(
            self.actual_optimizer, self.comm, double_buffering=True)
        opt = self.optimizer.setup(self.target)
        assert opt is self.optimizer
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)

        self.optimizer.target.a.W.grad[:] = self.comm.rank
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 1

        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 0)
        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 0) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 1) * np.ones((4, 3)))

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 3
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 4
        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 3) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 4) * np.ones((4, 3)))

        with self.target.init_scope():
            c = chainer.links.Linear(4, 4)
            c.to_gpu()
            self.target.c = c
        if self.comm.rank == 0:
            self.target.c.W.data[:] = self.comm.rank + 2
        self.optimizer.setup(self.target)
        self.optimizer.update()
        self.assertEqual(self.actual_optimizer.t, 0)

        send_buf = chainer.cuda.to_cpu(self.optimizer.target.c.W.data)
        recv_buf = self.comm.mpi_comm.allgather(send_buf)
        for i in range(1, self.comm.size):
            chainer.testing.assert_allclose(recv_buf[0], recv_buf[i])

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 6
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 7
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 8

        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 0)
        base = (self.comm.size - 1.0) / 2
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 6) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 7) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 8) * np.ones((4, 4)))

        self.optimizer.target.a.W.grad[:] = self.comm.rank + 9
        self.optimizer.target.b.W.grad[:] = self.comm.rank + 10
        self.optimizer.target.c.W.grad[:] = self.comm.rank + 11
        self.optimizer.update()
        self.optimizer.wait()
        self.assertEqual(self.actual_optimizer.t, 1)
        self.optimizer.target.a.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.a.W)
        self.optimizer.target.b.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.b.W)
        self.optimizer.target.c.W.update_rule.update.assert_called_once_with(
            self.optimizer.target.c.W)
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.a.W.grad,
            (base + 9) * np.ones((3, 2)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.b.W.grad,
            (base + 10) * np.ones((4, 3)))
        chainer.testing.assert_allclose(
            self.optimizer.communicated_target.c.W.grad,
            (base + 11) * np.ones((4, 4)))
        # barrier() requires before destructor of PureNcclCommunicator
        # because communication may not be finished.
        self.comm.mpi_comm.barrier()
Пример #37
0
def main():
    parser = argparse.ArgumentParser(description='ChainerMN example: MNIST')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--communicator', type=str,
                        default='hierarchical', help='Type of communicator')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.

    if args.gpu:
        if args.communicator == 'naive':
            print("Error: 'naive' communicator does not support GPU.\n")
            exit(-1)
        comm = chainermn.create_communicator(args.communicator)
        device = comm.intra_rank
    else:
        if args.communicator != 'naive':
            print('Warning: using naive communicator '
                  'because only naive supports CPU-only execution')
        comm = chainermn.create_communicator('naive')
        device = -1

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        if args.gpu:
            print('Using GPUs')
        print('Using {} communicator'.format(args.communicator))
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    model = L.Classifier(MLP(args.unit, 10))
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    test_iter = chainer.iterators.SerialIterator(test, args.batchsize,
                                                 repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    # Create a multi node evaluator from a standard Chainer evaluator.
    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar())

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
Пример #38
0
def main():
    # Check if GPU is available
    # (ImageNet example does not support CPU execution)
    if not chainer.cuda.available:
        raise RuntimeError("ImageNet requires GPU support.")

    archs = {
        'alex': alex.Alex,
        'googlenet': googlenet.GoogLeNet,
        'googlenetbn': googlenetbn.GoogLeNetBN,
        'nin': nin.NIN,
        'resnet50': resnet50.ResNet50,
    }

    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--arch', '-a', choices=archs.keys(), default='nin',
                        help='Convnet architecture')
    parser.add_argument('--batchsize', '-B', type=int, default=32,
                        help='Learning minibatch size')
    parser.add_argument('--epoch', '-E', type=int, default=10,
                        help='Number of epochs to train')
    parser.add_argument('--initmodel',
                        help='Initialize the model from given file')
    parser.add_argument('--loaderjob', '-j', type=int,
                        help='Number of parallel data loading processes')
    parser.add_argument('--mean', '-m', default='mean.npy',
                        help='Mean file (computed by compute_mean.py)')
    parser.add_argument('--resume', '-r', default='',
                        help='Initialize the trainer from given file')
    parser.add_argument('--out', '-o', default='result',
                        help='Output directory')
    parser.add_argument('--root', '-R', default='.',
                        help='Root directory path of image files')
    parser.add_argument('--val_batchsize', '-b', type=int, default=250,
                        help='Validation minibatch size')
    parser.add_argument('--test', action='store_true')
    parser.add_argument('--communicator', default='hierarchical')
    parser.set_defaults(test=False)
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        print('Using {} communicator'.format(args.communicator))
        print('Using {} arch'.format(args.arch))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    model = archs[args.arch]()
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    chainer.cuda.get_device_from_id(device).use()  # Make the GPU current
    model.to_gpu()

    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    mean = np.load(args.mean)
    if comm.rank == 0:
        train = PreprocessedDataset(args.train, args.root, mean, model.insize)
        val = PreprocessedDataset(
            args.val, args.root, mean, model.insize, False)
    else:
        train = None
        val = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    val = chainermn.scatter_dataset(val, comm)

    # We need to change the start method of multiprocessing module if we are
    # using InfiniBand and MultiprocessIterator. This is because processes
    # often crash when calling fork if they are using Infiniband.
    # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning )
    multiprocessing.set_start_method('forkserver')
    train_iter = chainer.iterators.MultiprocessIterator(
        train, args.batchsize, n_processes=args.loaderjob)
    val_iter = chainer.iterators.MultiprocessIterator(
        val, args.val_batchsize, repeat=False, n_processes=args.loaderjob)

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.MomentumSGD(lr=0.01, momentum=0.9), comm)
    optimizer.setup(model)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    checkpoint_interval = (10, 'iteration') if args.test else (1, 'epoch')
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    log_interval = (10, 'iteration') if args.test else (1, 'epoch')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    # Create a multi node evaluator from an evaluator.
    evaluator = TestModeEvaluator(val_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=val_interval)

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.dump_graph('main/loss'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'validation/main/loss',
            'main/accuracy', 'validation/main/accuracy', 'lr'
        ]), trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    trainer.run()
    comm = chainermn.create_communicator(communicator)
    device = comm.intra_rank if args.num_gpus > 0 else -1

    print('==========================================')
    print('Using {} communicator'.format(comm))
    print('Num unit: {}'.format(args.units))
    print('Num Minibatch-size: {}'.format(args.batch_size))
    print('Num epoch: {}'.format(args.epochs))
    print('==========================================')

    model = L.Classifier(MLP(args.units, 10))
    if device >= 0:
        chainer.cuda.get_device(device).use()

    # Create a multi node optimizer from a standard Chainer optimizer.
    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    train_file = np.load(os.path.join(args.train, 'train.npz'))
    test_file = np.load(os.path.join(args.test, 'test.npz'))

    preprocess_mnist_options = {
        'withlabel': True,
        'ndim': 1,
        'scale': 1.,
        'image_dtype': np.float32,
        'label_dtype': np.int32,
        'rgb_format': False
    }

    train_dataset = _preprocess_mnist(train_file, **preprocess_mnist_options)
Пример #40
0
def main():
    parser = argparse.ArgumentParser(description='Chainer example: seq2seq')
    parser.add_argument('--batchsize', '-b', type=int, default=64,
                        help='Number of images in each mini-batch')
    parser.add_argument('--bleu', action='store_true', default=False,
                        help='Report BLEU score')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--cache', '-c', default=None,
                        help='Directory to cache pre-processed dataset')
    parser.add_argument('--resume', '-r', default='',
                        help='Resume the training from snapshot')
    parser.add_argument('--unit', '-u', type=int, default=1024,
                        help='Number of units')
    parser.add_argument('--communicator', default='hierarchical',
                        help='Type of communicator')
    parser.add_argument('--stop', '-s', type=str, default='15e',
                        help='Stop trigger (ex. "500i", "15e")')
    parser.add_argument('--input', '-i', type=str, default='wmt',
                        help='Input directory')
    parser.add_argument('--optimizer', type=str, default='adam()',
                        help='Optimizer and its argument')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    args = parser.parse_args()

    # Prepare ChainerMN communicator
    if args.gpu:
        comm = chainermn.create_communicator('hierarchical')
        dev = comm.intra_rank
    else:
        comm = chainermn.create_communicator('naive')
        dev = -1

    if comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(comm.size))
        if args.gpu:
            print('Using GPUs')
        print('Using {} communicator'.format(args.communicator))
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('==========================================')

    # Rank 0 prepares all data
    if comm.rank == 0:
        if args.cache and not os.path.exists(args.cache):
            os.mkdir(args.cache)

        # Read source data
        bt = time.time()
        if args.cache:
            cache_file = os.path.join(args.cache, 'source.pickle')
            source_vocab, source_data = cached_call(cache_file,
                                                    read_source,
                                                    args.input, args.cache)
        else:
            source_vocab, source_data = read_source(args.input, args.cache)
        et = time.time()
        print('RD source done. {:.3f} [s]'.format(et - bt))
        sys.stdout.flush()

        # Read target data
        bt = time.time()
        if args.cache:
            cache_file = os.path.join(args.cache, 'target.pickle')
            target_vocab, target_data = cached_call(cache_file,
                                                    read_target,
                                                    args.input, args.cache)
        else:
            target_vocab, target_data = read_target(args.input, args.cache)
        et = time.time()
        print('RD target done. {:.3f} [s]'.format(et - bt))
        sys.stdout.flush()

        print('Original training data size: %d' % len(source_data))
        train_data = [(s, t)
                      for s, t in six.moves.zip(source_data, target_data)
                      if 0 < len(s) < 50 and 0 < len(t) < 50]
        print('Filtered training data size: %d' % len(train_data))

        en_path = os.path.join(args.input, 'dev', 'newstest2013.en')
        source_data = europal.make_dataset(en_path, source_vocab)
        fr_path = os.path.join(args.input, 'dev', 'newstest2013.fr')
        target_data = europal.make_dataset(fr_path, target_vocab)
        assert(len(source_data) == len(target_data))
        test_data = [(s, t) for s, t
                     in six.moves.zip(source_data, target_data)
                     if 0 < len(s) and 0 < len(t)]

        source_ids = {word: index
                      for index, word in enumerate(source_vocab)}
        target_ids = {word: index
                      for index, word in enumerate(target_vocab)}
    else:
        # target_data, source_data = None, None
        train_data, test_data = None, None
        target_ids, source_ids = None, None

    # Print GPU id
    for i in range(0, comm.size):
        if comm.rank == i:
            print('Rank {} GPU: {}'.format(comm.rank, dev))
        sys.stdout.flush()
        comm.mpi_comm.Barrier()

    # broadcast id- > word dictionary
    source_ids = comm.bcast_obj(source_ids, root=0)
    target_ids = comm.bcast_obj(target_ids, root=0)

    target_words = {i: w for w, i in target_ids.items()}
    source_words = {i: w for w, i in source_ids.items()}

    if comm.rank == 0:
        print('target_words : {}'.format(len(target_words)))
        print('source_words : {}'.format(len(source_words)))

    model = Seq2seq(3, len(source_ids), len(target_ids), args.unit)

    if dev >= 0:
        chainer.cuda.get_device_from_id(dev).use()
        model.to_gpu(dev)

    # determine the stop trigger
    m = re.match(r'^(\d+)e$', args.stop)
    if m:
        trigger = (int(m.group(1)), 'epoch')
    else:
        m = re.match(r'^(\d+)i$', args.stop)
        if m:
            trigger = (int(m.group(1)), 'iteration')
        else:
            if comm.rank == 0:
                sys.stderr.write('Error: unknown stop trigger: {}'.format(
                    args.stop))
            exit(-1)

    if comm.rank == 0:
        print('Trigger: {}'.format(trigger))

    optimizer = chainermn.create_multi_node_optimizer(
        create_optimizer(args.optimizer), comm)
    optimizer.setup(model)

    # Broadcast dataset
    # Sanity check of train_data
    train_data = chainermn.scatter_dataset(train_data, comm)

    test_data = chainermn.scatter_dataset(test_data, comm)

    train_iter = chainer.iterators.SerialIterator(train_data,
                                                  args.batchsize,
                                                  shuffle=False)
    updater = training.StandardUpdater(
        train_iter, optimizer, converter=convert, device=dev)
    trainer = training.Trainer(updater,
                               trigger,
                               out=args.out)

    trainer.extend(chainermn.create_multi_node_evaluator(
        BleuEvaluator(model, test_data, device=dev, comm=comm),
        comm))

    def translate_one(source, target):
        words = europal.split_sentence(source)
        print('# source : ' + ' '.join(words))
        x = model.xp.array(
            [source_ids.get(w, 1) for w in words], numpy.int32)
        ys = model.translate([x])[0]
        words = [target_words[y] for y in ys]
        print('#  result : ' + ' '.join(words))
        print('#  expect : ' + target)

    # @chainer.training.make_extension(trigger=(200, 'iteration'))
    def translate(trainer):
        translate_one(
            'Who are we ?',
            'Qui sommes-nous?')
        translate_one(
            'And it often costs over a hundred dollars ' +
            'to obtain the required identity card .',
            'Or, il en coûte souvent plus de cent dollars ' +
            'pour obtenir la carte d\'identité requise.')

        source, target = test_data[numpy.random.choice(len(test_data))]
        source = ' '.join([source_words.get(i, '') for i in source])
        target = ' '.join([target_words.get(i, '') for i in target])
        translate_one(source, target)

    if comm.rank == 0:
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')),
                       trigger=(1, 'epoch'))

        report = extensions.PrintReport(['epoch',
                                         'iteration',
                                         'main/loss',
                                         'main/perp',
                                         'validation/main/bleu',
                                         'elapsed_time'])
        trainer.extend(report, trigger=(1, 'epoch'))

    comm.mpi_comm.Barrier()
    if comm.rank == 0:
        print('start training')
        sys.stdout.flush()

    trainer.run()
    # comm.inter_rank gives the rank of the node. This should only print on one node.
    if comm.inter_rank == 0:
        print('# Minibatch-size: {}'.format(args.batch_size))
        print('# epoch: {}'.format(args.epochs))
        print('# communicator: {}'.format(args.communicator))

    # Set up a neural network to train.
    # Classifier reports softmax cross entropy loss and accuracy at every
    # iteration, which will be used by the PrintReport extension below.

    # comm.intra_rank gives the rank of the process on a given node.
    device = comm.intra_rank if num_gpus > 0 else -1
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()

    optimizer = chainermn.create_multi_node_optimizer(chainer.optimizers.MomentumSGD(args.learning_rate), comm)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.WeightDecay(5e-4))
    
    num_loaders = 2
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batch_size, n_processes=num_loaders)
    test_iter = chainer.iterators.MultiprocessIterator(test, args.batch_size, repeat=False, n_processes=num_loaders)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epochs, 'epoch'), out=args.output_data_dir)

    # Evaluate the model with the test dataset for each epoch

    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
Пример #42
0
def main():
    parser = argparse.ArgumentParser(
        description='ChainerMN example: pipelined neural network')
    parser.add_argument('--batchsize', '-b', type=int, default=100,
                        help='Number of images in each mini-batch')
    parser.add_argument('--epoch', '-e', type=int, default=20,
                        help='Number of sweeps over the dataset to train')
    parser.add_argument('--gpu', '-g', action='store_true',
                        help='Use GPU')
    parser.add_argument('--out', '-o', default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit', '-u', type=int, default=1000,
                        help='Number of units')
    args = parser.parse_args()

    # Prepare ChainerMN communicator.
    if args.gpu:
        comm = chainermn.create_communicator('hierarchical')
        data_axis, model_axis = comm.rank % 2, comm.rank // 2
        data_comm = comm.split(data_axis, comm.rank)
        model_comm = comm.split(model_axis, comm.rank)
        device = comm.intra_rank
    else:
        comm = chainermn.create_communicator('naive')
        data_axis, model_axis = comm.rank % 2, comm.rank // 2
        data_comm = comm.split(data_axis, comm.rank)
        model_comm = comm.split(model_axis, comm.rank)
        device = -1

    if model_comm.size != 2:
        raise ValueError(
            'This example can only be executed on the even number'
            'of processes.')

    if comm.rank == 0:
        print('==========================================')
        if args.gpu:
            print('Using GPUs')
        print('Num unit: {}'.format(args.unit))
        print('Num Minibatch-size: {}'.format(args.batchsize))
        print('Num epoch: {}'.format(args.epoch))
        print('==========================================')

    if data_axis == 0:
        model = L.Classifier(MLP0(model_comm, args.unit))
    elif data_axis == 1:
        model = MLP1(model_comm, args.unit, 10)

    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), data_comm)
    optimizer.setup(model)

    # Original dataset on worker 0 and 1.
    # Datasets of worker 0 and 1 are split and distributed to all workers.
    if model_axis == 0:
        train, test = chainer.datasets.get_mnist()
        if data_axis == 1:
            train = chainermn.datasets.create_empty_dataset(train)
            test = chainermn.datasets.create_empty_dataset(test)
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, data_comm, shuffle=True)
    test = chainermn.scatter_dataset(test, data_comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(
        train, args.batchsize, shuffle=False)
    test_iter = chainer.iterators.SerialIterator(
        test, args.batchsize, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=device)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)
    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, data_comm)
    trainer.extend(evaluator)

    # Some display and output extentions are necessary only for worker 0.
    if comm.rank == 0:
        trainer.extend(extensions.DumpGraph('main/loss'))
        trainer.extend(extensions.LogReport())
        trainer.extend(extensions.PrintReport(
            ['epoch', 'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
        trainer.extend(extensions.ProgressBar())

    trainer.run()
Пример #43
0
def check_mnist(gpu, display_log=True):
    epoch = 5
    batchsize = 100
    n_units = 100

    comm = chainermn.create_communicator('naive')
    if gpu:
        device = comm.intra_rank
        chainer.cuda.get_device_from_id(device).use()
    else:
        device = -1

    model = L.Classifier(MLP(n_units, 10))
    if gpu:
        model.to_gpu()

    optimizer = chainermn.create_multi_node_optimizer(
        chainer.optimizers.Adam(), comm)
    optimizer.setup(model)

    if comm.rank == 0:
        train, test = chainer.datasets.get_mnist()
    else:
        train, test = None, None

    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.SerialIterator(train, batchsize)
    test_iter = chainer.iterators.SerialIterator(test, batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    updater = training.StandardUpdater(
        train_iter,
        optimizer,
        device=device
    )

    trainer = training.Trainer(updater, (epoch, 'epoch'))

    # Wrap standard Chainer evaluators by MultiNodeEvaluator.
    evaluator = extensions.Evaluator(test_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator)

    # Add checkpointer. This is just to check checkpointing runs
    # without errors
    path = tempfile.mkdtemp(dir='/tmp', prefix=__name__ + "-tmp-")
    checkpointer = create_multi_node_checkpointer(name=__name__, comm=comm,
                                                  path=path)
    trainer.extend(checkpointer, trigger=(1, 'epoch'))

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0 and display_log:
        trainer.extend(extensions.LogReport(trigger=(1, 'epoch')),
                       trigger=(1, 'epoch'))
        trainer.extend(extensions.PrintReport(['epoch',
                                               'main/loss',
                                               'validation/main/loss',
                                               'main/accuracy',
                                               'validation/main/accuracy',
                                               'elapsed_time'],
                                              out=sys.stderr),
                       trigger=(1, 'epoch'))
    trainer.run()

    err = evaluator()['validation/main/accuracy']
    assert err > 0.95

    # Check checkpointer successfully finalized snapshot directory
    assert [] == os.listdir(path)
    os.removedirs(path)