def test_report(self):
        extension = extensions.ParameterStatistics(self.links,
                                                   statistics=self.statistics)
        self.trainer.extend(extension)
        self.trainer.run()

        self.assertEqual(len(self.trainer.observation), self.expect)
def main(gpu_id=-1, bs=32, epoch=20, out='./result', resume=''):
    net = ShallowConv()
    model = L.Classifier(net)
    if gpu_id >= 0:
        chainer.cuda.get_device_from_id(gpu_id)
        model.to_gpu()
    optimizer = chainer.optimizers.Adam()
    optimizer.setup(model)

    train, test = chainer.datasets.get_mnist(ndim=3)
    train_iter = chainer.iterators.SerialIterator(train, bs)
    test_iter = chainer.iterators.SerialIterator(
        test, bs, repeat=False, shuffle=False)

    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
    trainer.extend(extensions.ParameterStatistics(model.predictor))
    trainer.extend(extensions.Evaluator(test_iter, model, device=gpu_id))
    trainer.extend(extensions.LogReport(log_name='parameter_statistics'))
    trainer.extend(extensions.PrintReport(
        ['epoch', 'main/loss', 'validation/main/loss',
         'main/accuracy', 'validation/main/accuracy', 'elapsed_time']))
    trainer.extend(extensions.ProgressBar())

    if resume:
        chainer.serializers.load_npz(resume, trainer)

    trainer.run()
    def test_report_key_pattern(self):
        extension = extensions.ParameterStatistics(self.links)
        self.trainer.extend(extension)
        self.trainer.run()

        pattern = r'^(.+/){2,}(data|grad)/.+[^/]$'
        for name in six.iterkeys(self.trainer.observation):
            self.assertTrue(re.match(pattern, name))
    def test_custom_function(self):
        extension = extensions.ParameterStatistics(self.links,
                                                   statistics=self.statistics)
        self.trainer.extend(extension)
        self.trainer.run()

        for value in six.itervalues(self.trainer.observation):
            self.assertEqual(value, self.expect)
    def test_report_late_register(self):
        extension = extensions.ParameterStatistics(self.links, statistics={})
        for name, function in six.iteritems(self.statistics):
            extension.register_statistics(name, function)
        self.trainer.extend(extension)
        self.trainer.run()

        self.assertEqual(len(self.trainer.observation), self.expect)
    def test_report_key_prefix(self):
        extension = extensions.ParameterStatistics(self.links,
                                                   statistics=self.statistics,
                                                   prefix='prefix')
        self.trainer.extend(extension)
        self.trainer.run()

        for name in six.iterkeys(self.trainer.observation):
            self.assertTrue(name.startswith('prefix'))
    def create_extension(self, skip_statistics=False):
        kwargs = {
            'statistics': self.statistics if not skip_statistics else None,
            'report_params': self.report_params,
            'report_grads': self.report_grads,
            'prefix': self.prefix,
            'skip_nan_params': True  # avoid warnings when grads are nan
        }

        return extensions.ParameterStatistics(self.links, **kwargs)
    def test_skip_params(self):
        extension = extensions.ParameterStatistics(self.links,
                                                   statistics=self.statistics,
                                                   report_params=False)
        self.trainer.extend(extension)
        self.trainer.run()

        for name in six.iterkeys(self.trainer.observation):
            self.assertIn('grad', name)
            self.assertNotIn('data', name)
Exemplo n.º 9
0
def train_CNN(network_object, batchsize=128, gpu_id=-1, max_epoch=20, train_dataset=None, test_dataset=None, postfix='', base_lr=0.01, lr_decay=None,number = 11):
    number = str(number)
    # 1. Dataset
    if train_dataset is None and test_dataset is None:
        train, test = cifar.get_cifar10()
    else:
        train, test = train_dataset, test_dataset
        
    if gpu_id >= 0:
        network_object.to_gpu(gpu_id)
    # 2. Iterator
    train_iter = iterators.MultiprocessIterator(train, batchsize)
    test_iter = iterators.MultiprocessIterator(test, batchsize, False, False)

    # 3. Model
    net = L.Classifier(network_object)

    # 4. Optimizer
    optimizer = optimizers.MomentumSGD()
    optimizer.setup(net)
    optimizer.add_hook(chainer.optimizer.WeightDecay(0.0005))

    # 5. Updater
    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)

    # 6. Trainer
    trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='{}_crack_{}result'.format(network_object.__class__.__name__, postfix))
    
    # 7. Trainer extensions
    trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name="log_"+number))
    trainer.extend(extensions.snapshot(filename=number+'snapshot_epoch-{.updater.epoch}'),trigger=(5, 'epoch'))
#    trainer.extend(extensions.snapshot(), trigger=(10, 'epoch'))
    trainer.extend(extensions.ParameterStatistics(net.predictor.conv1, {'std': np.std}))
    trainer.extend(extensions.observe_lr())
    trainer.extend(extensions.Evaluator(test_iter, net, device=gpu_id), name='val')
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'elapsed_time', 'lr']))
    trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss'+number+'.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy'+number+'.png'))
    trainer.extend(extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std'+number+'.png'))    
    if lr_decay is not None:
        trainer.extend(extensions.ExponentialShift('lr', 0.1), trigger=lr_decay)
    trainer.run()
    del trainer

    return net                
Exemplo n.º 10
0
def run(aspect, train, word2vec, epoch, frequency, gpu, out, batchsize, lr,
        sparsity_coef, coherent_coef, fix_embedding, dependent, order, resume):
    """
    Train "Rationalizing Neural Predictions" for one specified aspect.

    Please refer README.md for details.
    """
    memory = Memory(cachedir='.', verbose=1)
    w2v, vocab, dataset, _, _ = \
        memory.cache(prepare_data)(train, word2vec, aspect)
    train_dataset, dev_dataset = chainer.datasets.split_dataset(
        dataset,
        len(dataset) - 500)

    encoder = rationale.models.Encoder(w2v.shape[1],
                                       order,
                                       200,
                                       2,
                                       dropout=0.1)
    generator_cls = (rationale.models.GeneratorDependent
                     if dependent else rationale.models.Generator)
    # Original impl. uses two layers to model bi-directional LSTM
    generator = generator_cls(w2v.shape[1], order, 200, dropout=0.1)
    model = rationale.models.RationalizedRegressor(generator,
                                                   encoder,
                                                   w2v.shape[0],
                                                   w2v.shape[1],
                                                   initialEmb=w2v,
                                                   dropout_emb=0.1,
                                                   fix_embedding=fix_embedding,
                                                   sparsity_coef=sparsity_coef,
                                                   coherent_coef=coherent_coef)

    if gpu >= 0:
        logger.info('Using GPU (%d)' % gpu)
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU
    elif chainer.backends.intel64.is_ideep_available():
        logger.info('Using CPU with iDeep')
        # iDeep was able to accelerate training on CPU by about 30% on laptop
        model.to_intel64()
        chainer.global_config.use_ideep = 'auto'
    else:
        logger.info('Using CPU without acceleration')

    # Impl. by author uses mean as loss. Let's divide lr by batchsize to have
    # similar effect
    optimizer = chainer.optimizers.Adam(alpha=lr / batchsize)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(3.0))
    l2_reg = 1e-7
    # Impl. by author implements Weight decay as L2 loss, thus multiplying it
    # by the learning rate. Let's implement it that way.
    optimizer.add_hook(chainer.optimizer.WeightDecay(l2_reg * lr))

    train_iter = chainer.iterators.SerialIterator(train_dataset, batchsize)

    # Set up a trainer
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       device=gpu,
                                       converter=rationale.training.convert)

    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)

    logger.info("train: {},  dev: {}".format(len(train_dataset),
                                             len(dev_dataset)))
    # Evaluate the model with the development dataset for each epoch
    dev_iter = chainer.iterators.SerialIterator(dev_dataset,
                                                batchsize,
                                                repeat=False,
                                                shuffle=False)

    evaluator = extensions.Evaluator(dev_iter,
                                     model,
                                     device=gpu,
                                     converter=rationale.training.convert)
    trainer.extend(evaluator, trigger=frequency)

    inv_vocab = {v: k for k, v in vocab.items()}

    @chainer.training.make_extension()
    def monitor_rationale(_):
        batch = dev_dataset[np.random.choice(len(dev_dataset))]
        batch = rationale.training.convert([batch], gpu)
        z = chainer.cuda.to_cpu(model.predict_rationale(batch['xs'])[0])
        source = [
            inv_vocab[int(xi)] for xi in chainer.cuda.to_cpu(batch['xs'][0])
        ]
        result = [t if zi > 0.5 else '_' for t, zi in zip(source, z)]
        print('# source : ' + ' '.join(source))
        print('# result : ' + ' '.join(result))

    trainer.extend(monitor_rationale, trigger=(10, 'iteration'))
    trainer.extend(SaveRestore(filename='trainer.npz'),
                   trigger=MinValueTrigger('validation/main/generator/cost'),
                   priority=96)

    trainer.extend(
        ConditionalRestart(monitor='validation/main/generator/cost',
                           mode='min',
                           patients=2))

    if gpu < 0:
        # ParameterStatistics does not work with GPU as of chainer 2.x
        # https://github.com/chainer/chainer/issues/3027
        trainer.extend(extensions.ParameterStatistics(model,
                                                      trigger=(100,
                                                               'iteration')),
                       priority=99)

    # Write a log of evaluation statistics for each iteration
    trainer.extend(extensions.LogReport(trigger=(1, 'iteration')), priority=98)
    trainer.extend(extensions.PrintReport(
        [
            'epoch', 'main/encoder/mse', 'main/generator/cost',
            'validation/main/encoder/mse', 'validation/main/generator/cost'
        ],
        log_report=extensions.LogReport(trigger=(10, 'iteration'))),
                   trigger=(10, 'iteration'),
                   priority=97)

    if resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(resume, trainer)

    logger.info("Started training")
    trainer.run()

    # Save final model (without trainer)
    chainer.serializers.save_npz(os.path.join(out, 'trained_model.npz'), model)
    with open(os.path.join(out, 'vocab.json'), 'w') as fout:
        json.dump(vocab, fout)
Exemplo n.º 11
0
EPOCH_NUM = 10
EMBED_SIZE = 100
HIDDEN_SIZE = 200
BATCH_SIZE = 5
OUT_SIZE = 2

model = L.Classifier(
    GRU_SentenceClassifier(vocab_size=len(words),
                           embed_size=EMBED_SIZE,
                           hidden_size=HIDDEN_SIZE,
                           out_size=OUT_SIZE))
optimizer = optimizers.Adam()
optimizer.setup(model)
train, test = chainer.datasets.split_dataset_random(dataset, N - 10)
train_iter = chainer.iterators.SerialIterator(train, BATCH_SIZE)
test_iter = chainer.iterators.SerialIterator(test, BATCH_SIZE, repeat=False)
updater = training.StandardUpdater(train_iter, optimizer, device=-1)
trainer = training.Trainer(updater, (EPOCH_NUM, "epoch"), out="result")
trainer.extend(extensions.Evaluator(test_iter, model, device=-1))
trainer.extend(extensions.LogReport(trigger=(1, "epoch")))
trainer.extend(
    extensions.PrintReport([
        "epoch", "main/loss", "validation/main/loss", "main/accuracy",
        "validation/main/accuracy", "elapsed_time"
    ]))  # エポック、学習損失、テスト損失、学習正解率、テスト正解率、経過時間
trainer.extend(extensions.ProgressBar())
trainer.extend(extensions.ParameterStatistics(model))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.run()
Exemplo n.º 12
0
def train_model(model, train_iter, valid_iter, epoch=10, out='__result__',
                init_file=None, fix_trained=False, alpha=0.001, init_all=True):
    learner = model

    # 最適化手法の選択
    optimizer = O.Adam(alpha=alpha).setup(learner)

    if fix_trained:
        for m in model[:-1]:
            m.disable_update()

    # Updaterの準備 (パラメータを更新)
    updater = T.StandardUpdater(train_iter, optimizer, device=C_.DEVICE)

    # Trainerの準備
    trainer = T.Trainer(updater, stop_trigger=(epoch, 'epoch'), out=out)

    # TrainerにExtensionを追加する
    ## 検証
    trainer.extend(E.Evaluator(valid_iter, learner, device=C_.DEVICE),
                   name='val')

    ## モデルパラメータの統計を記録する
    trainer.extend(E.ParameterStatistics(learner.predictor,
                                                  {'std': np.std},
                                                  prefix='links'))

    ## 学習率を記録する
    trainer.extend(E.observe_lr())

    ## 学習経過を画面出力
    trainer.extend(
        E.PrintReport(
            ['epoch', 'main/loss', 'val/main/loss', 'elapsed_time', 'lr']))

    ## ログ記録 (他のextensionsの結果も含まれる)
    trainer.extend(E.LogReport(log_name='log.json'))

    ## 学習経過を画像出力
    if C_.OS_IS_WIN:
        def ex_pname(link):
            ls = list(link.links())[1:]
            if not ls:
                names = (p.name for p in link.params())
            else:
                names = chain(*map(ex_pname, ls))
            return [f'{link.name}/{n}' for n in names]

        def register(keys, file_name):
            trainer.extend(E.PlotReport(keys,# x_key='epoch',
                                                 file_name=file_name,
                                                 marker=None))

        register('lr', file_name='lr.png')
        register(['main/loss', 'val/main/loss'], file_name='loss.png')

        if 'vae' in learner.name:
            register(['main/reconstr', 'val/main/reconstr'],
                     file_name='reconstr.png')

            register(['main/kl_penalty', 'val/main/kl_penalty'],
                     file_name='kl_penalty.png')

            register(['main/mse_vel', 'val/main/mse_vel'],
                     file_name='mse_vel.png')

            register(['main/mse_vor', 'val/main/mse_vor'],
                     file_name='mse_vor.png')

        for link in learner.predictor:
            param_names = ex_pname(link)
            for d in ('data', 'grad'):
                observe_keys_std = [f'links/predictor/{key}/{d}/std'
                                    for key in param_names]
                for l in ('enc', 'dec', 'bne', 'bnd'):
                    file_name = f'std_{d}_{l}_{link.name}.png'
                    f_ = lambda s: l in s# or f'bn{l[0]}' in s
                    keys = list(filter(f_, observe_keys_std))
                    register(keys, file_name=file_name)

    ## ネットワーク形状をdot言語で出力
    ## 可視化コード: ```dot -Tpng cg.dot -o [出力ファイル]```
    trainer.extend(E.dump_graph('main/loss'))

    ## トレーナーオブジェクトをシリアライズし、出力ディレクトリに保存
    trainer.extend(
        E.snapshot(filename='snapshot_epoch-{.updater.epoch}.model'))

    ## プログレスバー
    if C_.SHOW_PROGRESSBAR:
        trainer.extend(E.ProgressBar())

    if init_file:
        print('loading snapshot:', init_file)
        try:
            if init_all:
                chainer.serializers.load_npz(init_file, trainer)

            else:
                chainer.serializers.load_npz(init_file, learner,
                                             path='updater/model:main/')
        except KeyError:
            raise

    # 自作Extension
    # trainer.extend(plot_loss_ex, trigger=(1, 'epoch'))
    # trainer.extend(lr_drop_ex(alpha), trigger=(1, 'epoch'))
    trainer.extend(pause_ex, trigger=(1, 'iteration'))

    # 学習を開始する
    try:
        trainer.run()
    except:
        print('trainer except')
        raise
    finally:
        print('trainer end')
Exemplo n.º 13
0
def main():
    set_random_seed(0)

    parser = argparse.ArgumentParser(
        description='Document Classification Example')
    parser.add_argument('--batchsize',
                        '-b',
                        type=int,
                        default=64,
                        help='Number of documents in each mini-batch')
    parser.add_argument('--epoch',
                        '-e',
                        type=int,
                        default=30,
                        help='Number of training epochs')
    parser.add_argument('--gpu',
                        '-g',
                        type=int,
                        default=-1,
                        help='GPU ID (negative value indicates CPU)')
    parser.add_argument('--out',
                        '-o',
                        default='result',
                        help='Directory to output the result')
    parser.add_argument('--unit',
                        '-u',
                        type=int,
                        default=200,
                        help='Number of units')
    parser.add_argument('--vocab',
                        '-v',
                        type=int,
                        default=50000,
                        help='Vocabulary size')
    parser.add_argument('--layer',
                        '-l',
                        type=int,
                        default=1,
                        help='Number of layers of LSMT')
    parser.add_argument('--dropout',
                        '-d',
                        type=float,
                        default=0.4,
                        help='Dropout rate')
    parser.add_argument('--gradclip',
                        type=float,
                        default=5,
                        help='Gradient clipping threshold')
    parser.add_argument('--train_file',
                        '-train',
                        default='data/train.seg.csv',
                        help='Trainig data file.')
    parser.add_argument('--test_file',
                        '-test',
                        default='data/test.seg.csv',
                        help='Test data file.')
    parser.add_argument('--model',
                        '-m',
                        help='read model parameters from npz file')
    parser.add_argument(
        '--vcb_file',
        '-vf',
        default=
        '/mnt/gold/users/s18153/prjPyCharm/prjNLP_GPU/data/vocab_train_w_NoReplace.vocab_file',
        help='Vocabulary data file.')
    parser.add_argument('--case',
                        '-c',
                        default='original',
                        help='Select NN Architecture.')
    parser.add_argument('--opt', default='sgd', help='Select Optimizer.')
    parser.add_argument('--dbg_on',
                        action='store_true',
                        help='No save, MiniTrain')
    args = parser.parse_args()
    print(args)
    # train_val = data.DocDataset(args.train_file, vocab_size=args.vocab)

    if os.path.exists(args.vcb_file):  # args.vocab_fileの存在確認(作成済みの場合ロード)
        with open(args.vcb_file, 'rb') as f_vocab_data:
            train_val = pickle.load(f_vocab_data)
            if len(train_val.get_vocab()) != args.vocab:
                warnings.warn('vocab size incorrect (not implemented...)')
    else:
        train_val = data.DocDataset(
            args.train_file,
            vocab_size=args.vocab)  # make vocab from training data
        with open(args.vcb_file, 'wb') as f_vocab_save:
            pickle.dump(train_val, f_vocab_save)

    if args.dbg_on:
        len_train_data = len(train_val)
        N = 1000
        print('N', N)
        rnd_ind = np.random.permutation(range(len_train_data))[:N]
        train_val = train_val[rnd_ind]
        (train, valid) = split_dataset_random(train_val, 800, seed=0)
    else:
        (train, valid) = split_dataset_random(train_val, 4000, seed=0)

    train_iter = iterators.SerialIterator(train, args.batchsize)
    valid_iter = iterators.SerialIterator(valid,
                                          args.batchsize,
                                          repeat=False,
                                          shuffle=False)

    # test = data.DocDataset(args.test_file, train_val.get_vocab())
    # test_iter = iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

    print('case', args.case)
    if args.case == 'original':
        print('originalで実行されます')
        result_path = 'result/original'
        model = L.Classifier(
            nets_B.DocClassify(n_vocab=args.vocab + 1,
                               n_units=args.unit,
                               n_layers=args.layer,
                               n_out=4,
                               dropout=args.dropout))
    elif args.case == 'bi':
        print('biで実行されます')
        result_path = 'result/bi'
        model = L.Classifier(
            nets_B.DocClassifyBi(n_vocab=args.vocab + 1,
                                 n_units=args.unit,
                                 n_layers=args.layer,
                                 n_out=4,
                                 dropout=args.dropout))
    elif args.case == 'bi2' or args.case == 'bi_adam_2layer':
        print('bi改良版')
        result_path = 'result/bi2'
        model = L.Classifier(
            nets_B.DocClassifyBi2(n_vocab=args.vocab + 1,
                                  n_units=args.unit,
                                  n_layers=args.layer,
                                  n_out=4,
                                  dropout=args.dropout))
    else:
        warnings.warn('指定したケースは存在しません。デフォルトで実行します')
        result_path = 'result/sample_result'
        model = L.Classifier(
            nets_B.DocClassify(n_vocab=args.vocab + 1,
                               n_units=args.unit,
                               n_layers=args.layer,
                               n_out=4,
                               dropout=args.dropout))

    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        # get_device_from_id(args.gpu).use()
        model.to_gpu()

    if args.opt == 'sgd':
        result_path += '_sgd'
        print('SGD')
        optimizer = optimizers.SGD(lr=0.01)
    elif args.opt == 'adam':
        result_path += '_adam'
        print('Adam')
        optimizer = optimizers.Adam()
    elif args.opt == 'bi_adam_2layer':
        result_path += '_adam_2layer'
        print('Adam')
        optimizer = optimizers.Adam()
    else:
        print('指定なしのためSGDで実行')
        optimizer = optimizers.SGD(lr=0.01)
    optimizer.setup(model)
    optimizer.add_hook(chainer.optimizer.GradientClipping(args.gradclip))
    # optimizer.add_hook(chainer.optimizer.Lasso(0.01))

    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=convert_seq,
                                       device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=result_path)
    trainer.extend(extensions.LogReport())
    if not args.dbg_on:
        trainer.extend(
            extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
    trainer.extend(extensions.Evaluator(valid_iter,
                                        model,
                                        converter=convert_seq,
                                        device=args.gpu),
                   name='val')
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
            'val/main/accuracy', 'elapsed_time'
        ]))
    trainer.extend(
        extensions.ParameterStatistics(model.predictor.doc_enc,
                                       {'std': np.std}))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'val/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                              x_key='epoch',
                              file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))

    if args.model:
        serializers.load_npz(args.model, trainer)

    trainer.run()

    pass
Exemplo n.º 14
0
def main():
    args = arguments()

    #    chainer.config.type_check = False
    chainer.config.autotune = True
    chainer.config.dtype = dtypes[args.dtype]
    chainer.print_runtime_info()
    #print('Chainer version: ', chainer.__version__)
    #print('GPU availability:', chainer.cuda.available)
    #print('cuDNN availability:', chainer.cuda.cudnn_enabled)

    ## dataset preparation
    if args.imgtype == "dcm":
        from dataset_dicom import Dataset
    else:
        from dataset import Dataset
    train_d = Dataset(args.train,
                      args.root,
                      args.from_col,
                      args.to_col,
                      crop=(args.crop_height, args.crop_width),
                      random=args.random_translate,
                      grey=args.grey)
    test_d = Dataset(args.val,
                     args.root,
                     args.from_col,
                     args.to_col,
                     crop=(args.crop_height, args.crop_width),
                     random=args.random_translate,
                     grey=args.grey)

    # setup training/validation data iterators
    train_iter = chainer.iterators.SerialIterator(train_d, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test_d,
                                                 args.nvis,
                                                 shuffle=False)
    test_iter_gt = chainer.iterators.SerialIterator(
        train_d, args.nvis,
        shuffle=False)  ## same as training data; used for validation

    args.ch = len(train_d[0][0])
    args.out_ch = len(train_d[0][1])
    print("Input channels {}, Output channels {}".format(args.ch, args.out_ch))

    ## Set up models
    gen = net.Generator(args)
    dis = net.Discriminator(args)

    ## load learnt models
    optimiser_files = []
    if args.model_gen:
        serializers.load_npz(args.model_gen, gen)
        print('model loaded: {}'.format(args.model_gen))
        optimiser_files.append(args.model_gen.replace('gen_', 'opt_gen_'))
    if args.model_dis:
        serializers.load_npz(args.model_dis, dis)
        print('model loaded: {}'.format(args.model_dis))
        optimiser_files.append(args.model_dis.replace('dis_', 'opt_dis_'))

    ## send models to GPU
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        gen.to_gpu()
        dis.to_gpu()

    # Setup optimisers
    def make_optimizer(model, lr, opttype='Adam'):
        #        eps = 1e-5 if args.dtype==np.float16 else 1e-8
        optimizer = optim[opttype](lr)
        if args.weight_decay > 0:
            if opttype in ['Adam', 'AdaBound', 'Eve']:
                optimizer.weight_decay_rate = args.weight_decay
            else:
                if args.weight_decay_norm == 'l2':
                    optimizer.add_hook(
                        chainer.optimizer.WeightDecay(args.weight_decay))
                else:
                    optimizer.add_hook(
                        chainer.optimizer_hooks.Lasso(args.weight_decay))
        optimizer.setup(model)
        return optimizer

    opt_gen = make_optimizer(gen, args.learning_rate, args.optimizer)
    opt_dis = make_optimizer(dis, args.learning_rate, args.optimizer)
    optimizers = {'opt_g': opt_gen, 'opt_d': opt_dis}

    ## resume optimisers from file
    if args.load_optimizer:
        for (m, e) in zip(optimiser_files, optimizers):
            if m:
                try:
                    serializers.load_npz(m, optimizers[e])
                    print('optimiser loaded: {}'.format(m))
                except:
                    print("couldn't load {}".format(m))
                    pass

    # Set up trainer
    updater = pixupdater(
        models=(gen, dis),
        iterator={
            'main': train_iter,
            'test': test_iter,
            'test_gt': test_iter_gt
        },
        optimizer={
            'gen': opt_gen,
            'dis': opt_dis
        },
        #        converter=convert.ConcatWithAsyncTransfer(),
        params={'args': args},
        device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    ## save learnt results at an interval
    if args.snapinterval < 0:
        args.snapinterval = args.epoch
    snapshot_interval = (args.snapinterval, 'epoch')
    display_interval = (args.display_interval, 'iteration')

    trainer.extend(extensions.snapshot_object(gen, 'gen_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    trainer.extend(extensions.snapshot_object(opt_gen,
                                              'opt_gen_{.updater.epoch}.npz'),
                   trigger=snapshot_interval)
    if args.lambda_dis > 0:
        trainer.extend(extensions.snapshot_object(dis,
                                                  'dis_{.updater.epoch}.npz'),
                       trigger=snapshot_interval)
        trainer.extend(
            extensions.dump_graph('dis/loss_real', out_name='dis.dot'))
        trainer.extend(extensions.snapshot_object(
            opt_dis, 'opt_dis_{.updater.epoch}.npz'),
                       trigger=snapshot_interval)

    if args.lambda_rec_l1 > 0:
        trainer.extend(extensions.dump_graph('gen/loss_L1',
                                             out_name='gen.dot'))
    elif args.lambda_rec_l2 > 0:
        trainer.extend(extensions.dump_graph('gen/loss_L2',
                                             out_name='gen.dot'))

    ## log outputs
    log_keys = ['epoch', 'iteration', 'lr']
    log_keys_gen = [
        'gen/loss_L1', 'gen/loss_L2', 'gen/loss_dis', 'myval/loss_L2',
        'gen/loss_tv'
    ]
    log_keys_dis = ['dis/loss_real', 'dis/loss_fake', 'dis/loss_mispair']
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport(log_keys + log_keys_gen +
                                          log_keys_dis),
                   trigger=display_interval)
    if extensions.PlotReport.available():
        trainer.extend(
            extensions.PlotReport(log_keys_gen,
                                  'iteration',
                                  trigger=display_interval,
                                  file_name='loss_gen.png'))
        trainer.extend(
            extensions.PlotReport(log_keys_dis,
                                  'iteration',
                                  trigger=display_interval,
                                  file_name='loss_dis.png'))
    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.extend(extensions.ParameterStatistics(gen))
    # learning rate scheduling
    if args.optimizer in ['SGD', 'Momentum', 'AdaGrad', 'RMSprop']:
        trainer.extend(extensions.observe_lr(optimizer_name='gen'),
                       trigger=display_interval)
        trainer.extend(extensions.ExponentialShift('lr',
                                                   0.33,
                                                   optimizer=opt_gen),
                       trigger=(args.epoch / 5, 'epoch'))
        trainer.extend(extensions.ExponentialShift('lr',
                                                   0.33,
                                                   optimizer=opt_dis),
                       trigger=(args.epoch / 5, 'epoch'))
    elif args.optimizer in ['Adam', 'AdaBound', 'Eve']:
        trainer.extend(extensions.observe_lr(optimizer_name='gen'),
                       trigger=display_interval)
        trainer.extend(extensions.ExponentialShift("alpha",
                                                   0.33,
                                                   optimizer=opt_gen),
                       trigger=(args.epoch / 5, 'epoch'))
        trainer.extend(extensions.ExponentialShift("alpha",
                                                   0.33,
                                                   optimizer=opt_dis),
                       trigger=(args.epoch / 5, 'epoch'))

    # evaluation
    vis_folder = os.path.join(args.out, "vis")
    os.makedirs(vis_folder, exist_ok=True)
    if not args.vis_freq:
        args.vis_freq = len(train_d) // 2
    trainer.extend(VisEvaluator({
        "test": test_iter,
        "train": test_iter_gt
    }, {"gen": gen},
                                params={'vis_out': vis_folder},
                                device=args.gpu),
                   trigger=(args.vis_freq, 'iteration'))

    # ChainerUI: removed until ChainerUI updates to be compatible with Chainer 6.0
    #    trainer.extend(CommandsExtension())

    # Run the training
    print("trainer start")
    trainer.run()
Exemplo n.º 15
0
def main():
    # Check if GPU is available
    if not chainer.cuda.available:
        raise RuntimeError("ImageNet requires GPU support.")

    parser = argparse.ArgumentParser(
        description='Training ResNet50 on ImageNet')
    # Data
    parser.add_argument('train', help='Path to training image-label list file')
    parser.add_argument('val', help='Path to validation image-label list file')
    parser.add_argument('--train_root', default='.')
    parser.add_argument('--val_root', default='.')
    parser.add_argument('--mean', default='mean.npy')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--iterator', default='thread')
    # Training Settings
    parser.add_argument('--arch_file', type=str, default='models/resnet50.py')
    parser.add_argument('--arch_name', type=str, default='ResNet50')
    parser.add_argument('--initmodel')
    parser.add_argument('--resume', default='')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--val_batchsize', type=int, default=16)
    parser.add_argument('--acc_iters', type=int, default=1)
    parser.add_argument('--epoch', '-E', type=int, default=36)
    parser.add_argument('--normalize_weight',
                        action='store_true',
                        default=True)  # NOQA
    parser.add_argument('--nw_skip_scale_comp',
                        action='store_true',
                        default=False)  # NOQA
    # Hyper-parameters
    parser.add_argument('--lr', type=float, default=8.18e-3)
    parser.add_argument('--lr_plan', default='polynomial')
    parser.add_argument('--epoch_lr_decay_start', type=float, default=1)
    parser.add_argument('--polynomial_decay_p', type=float, default=11)
    parser.add_argument('--polynomial_epoch', type=float, default=53)
    parser.add_argument('--momentum', type=float, default=0.997)
    parser.add_argument('--adjust_momentum', action='store_true',
                        default=True)  # NOQA
    parser.add_argument('--mixup_alpha', type=float, default=0.4)
    parser.add_argument('--running_mixup', action='store_true', default=True)
    parser.add_argument('--re_rate', type=float, default=0.5)
    parser.add_argument('--re_area_rl', type=float, default=0.02)
    parser.add_argument('--re_area_rh', type=float, default=0.25)
    parser.add_argument('--re_aspect_rl', type=float, default=0.3)
    parser.add_argument('--cov_ema_decay', type=float, default=1.0)
    parser.add_argument('--damping', type=float, default=2.5e-4)
    parser.add_argument('--use_tensor_core',
                        action='store_true',
                        default=False)  # NOQA
    parser.add_argument('--communicate_after_forward',
                        action='store_true',
                        default=False)  # NOQA
    # Other
    parser.add_argument('--test', action='store_true', default=False)
    parser.add_argument('--stats', action='store_true', default=False)
    parser.add_argument('--config', type=str, default=None)
    parser.add_argument('--config_out', default='config.json')
    parser.add_argument('--out', '-o', default='result')

    args = parser.parse_args()
    dict_args = vars(args)

    # ======== Load config file ========
    if args.config is not None:
        with open(args.config) as f:
            _config = json.load(f)
        dict_args.update(_config)

    # ======== Create communicator ========
    comm = chainerkfac.create_communicator('pure_nccl')
    device = comm.intra_rank

    # ======== Create model ========
    kwargs = {
        'mixup_alpha': args.mixup_alpha,
        'running_mixup': args.running_mixup,
        're_area_rl': args.re_area_rl,
        're_area_rh': args.re_area_rh,
        're_aspect_rl': args.re_aspect_rl,
        're_rate': args.re_rate,
    }
    arch = get_arch(args.arch_file, args.arch_name)
    model = arch(**kwargs)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    # ======== Copy model to GPU ========
    try:
        chainer.cuda.get_device_from_id(device).use()  # Make the GPU current
        model.to_gpu()
    except chainer.cuda.cupy.cuda.runtime.CUDARuntimeError as e:
        print('[ERROR] Host: {}, GPU ID: {}'.format(socket.gethostname(),
                                                    device),
              file=sys.stderr)
        raise e

    # ======== Create dataset ========
    # Split and distribute the dataset. Only worker 0 loads the whole dataset.
    # Datasets of worker 0 are evenly split and distributed to all workers.
    mean = np.load(args.mean)
    if comm.rank == 0:
        train = datasets.read_pairs(args.train)
        val = datasets.read_pairs(args.val)
    else:
        train = None
        val = None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    val = chainermn.scatter_dataset(val, comm)
    train = datasets.CroppingImageDatasetIO(train, args.train_root, mean,
                                            model.insize, model.insize)
    val = datasets.CroppingImageDatasetIO(val, args.val_root, mean,
                                          model.insize, model.insize, False)

    # ======== Create iterator ========
    if args.iterator == 'process':
        # We need to change the start method of multiprocessing module if we
        # are using InfiniBand and MultiprocessIterator. This is because
        # processes often crash when calling fork if they are using Infiniband.
        # (c.f., https://www.open-mpi.org/faq/?category=tuning#fork-warning )
        multiprocessing.set_start_method('forkserver')
        train_iter = chainer.iterators.MultiprocessIterator(
            train, args.batchsize, n_processes=args.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val,
            args.val_batchsize,
            n_processes=args.loaderjob,
            repeat=False,
            shuffle=False)
    elif args.iterator == 'thread':
        train_iter = chainer.iterators.MultithreadIterator(
            train, args.batchsize, n_threads=args.loaderjob)
        val_iter = chainer.iterators.MultithreadIterator(
            val,
            args.val_batchsize,
            n_threads=args.loaderjob,
            repeat=False,
            shuffle=False)
    else:
        train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
        val_iter = chainer.iterators.SerialIterator(val,
                                                    args.val_batchsize,
                                                    repeat=False,
                                                    shuffle=False)

    # ======== Create optimizer ========
    optimizer = chainerkfac.optimizers.DistributedKFAC(
        comm,
        lr=args.lr,
        momentum=args.momentum,
        cov_ema_decay=args.cov_ema_decay,
        damping=args.damping,
        acc_iters=args.acc_iters,
        adjust_momentum=args.adjust_momentum,
        communicate_after_forward=args.communicate_after_forward,
    )
    optimizer.setup(model)
    optimizer.use_fp32_update()

    if args.normalize_weight:
        link = getattr(optimizer, 'target')
        for param in link.params():
            if getattr(param, 'normalize_weight', False):
                param.update_rule.add_hook(
                    NormalizeWeightUR(skip_scale_comp=args.nw_skip_scale_comp))

    if comm.rank == 0:
        print('indices: {}'.format(optimizer.indices))

    # ======== Create updater ========
    updater = training.StandardUpdater(train_iter, optimizer, device=device)

    # ======== Create trainer ========
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.out)

    # ======== Extend trainer ========
    val_interval = (10, 'iteration') if args.test else (1, 'epoch')
    log_interval = (10, 'iteration') if args.test else (1, 'epoch')
    # Create a multi node evaluator from an evaluator.
    evaluator = TestModeEvaluator(val_iter, model, device=device)
    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=val_interval)
    # Reduce the learning rate
    if args.lr_plan == 'polynomial':
        epoch_end = max(args.epoch, args.polynomial_epoch)
        trainer.extend(LrPolynomialDecay(args.lr,
                                         args.epoch_lr_decay_start,
                                         epoch_end,
                                         p=args.polynomial_decay_p),
                       trigger=(args.acc_iters, 'iteration'))

    # Some display and output extensions are necessary only for one worker.
    # (Otherwise, there would just be repeated outputs.)
    if comm.rank == 0:
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'elapsed_time', 'main/loss',
            'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy', 'lr'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))
        if args.stats:
            trainer.extend(extensions.ParameterStatistics(model))

    if args.resume:
        chainer.serializers.load_npz(args.resume, trainer)

    if comm.rank == 0:
        hyperparams = optimizer.hyperparam.get_dict()
        for k, v in hyperparams.items():
            print('{}: {}'.format(k, v))

    # ======== Save configration ========
    os.makedirs(args.out, exist_ok=True)
    my_config = {}
    my_config['args'] = vars(args)
    my_config['hyperparams'] = optimizer.hyperparam.get_dict()

    with open(os.path.join(args.out, args.config_out), 'w') as f:
        r = json.dumps(my_config)
        f.write(r)

    # Copy this file to args.out
    shutil.copy(os.path.realpath(__file__), args.out)

    chainer.cuda.set_max_workspace_size(512 * 1024 * 1024)
    config.autotune = True
    config.cudnn_fast_batch_normalization = True

    trainer.run()
Exemplo n.º 16
0
def train(args):
    config = yaml.load(open(args.config))

    print('==========================================')

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Output version info
    print('chainer version: {}'.format(chainer.__version__))
    print('cuda: {}, cudnn: {}, nccl: {}'.format(chainer.cuda.available,
                                                 chainer.cuda.cudnn_enabled,
                                                 HAVE_NCCL))

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config)
    print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    train_dataset, valid_dataset = get_dataset_from_config(config)
    print('train_dataset: {}'.format(len(train_dataset)),
          train_dataset.__class__.__name__)
    print('valid_dataset: {}'.format(len(valid_dataset)),
          valid_dataset.__class__.__name__)

    # Prepare devices
    devices = {'main': args.gpus[0]}
    for gid in args.gpus[1:]:
        devices['gpu{}'.format(gid)] = gid

    # Create iterators
    train_iter, valid_iter = create_iterators(
        train_dataset, config['dataset']['train']['batchsize'], valid_dataset,
        config['dataset']['valid']['batchsize'], devices)
    print('train_iter:', train_iter.__class__.__name__)
    print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater
    updater_creator = get_updater_creator_from_config(config)
    updater = updater_creator(train_iter, optimizer, devices)
    print('updater:', updater.__class__.__name__)

    # Create trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport':
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr':
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph':
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            evaluator_creator = get_evaluator_creator_from_config(values)
            evaluator = evaluator_creator(valid_iter, model, devices)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport':
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport':
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar':
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot':
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)
        elif ext == 'ParameterStatistics':
            links = []
            for link_name in values.pop('links'):
                lns = [ln.strip() for ln in link_name.split('.') if ln.strip()]
                target = model.predictor
                for ln in lns:
                    target = getattr(target, ln)
                links.append(target)
            trainer.extend(extensions.ParameterStatistics(links, **values))
        elif ext == 'custom':
            custom_extension = get_custum_extension_from_config(values)
            trainer.extend(custom_extension, trigger=values['trigger'])

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    # Resume
    if args.resume is not None:
        fn = '{}.bak'.format(args.resume)
        shutil.copy(args.resume, fn)
        serializers.load_npz(args.resume, trainer)
        print('Resumed from:', args.resume)

    print('==========================================')

    trainer.run()
    return 0
Exemplo n.º 17
0
def run(dataset, word2vec, epoch, frequency, gpu, out, model, batchsize, lr,
        fix_embedding, resume):
    """
    Train multi-domain user review classification using Blitzer et al.'s dataset
    (https://www.cs.jhu.edu/~mdredze/datasets/sentiment/)

    Please refer README.md for details.
    """
    memory = Memory(cachedir=out, verbose=1)
    w2v, vocab, train_dataset, dev_dataset, _, label_dict, domain_dict = \
        memory.cache(prepare_blitzer_data)(dataset, word2vec)
    if model == 'rnn':
        model = multidomain_sentiment.models.create_rnn_predictor(
            len(domain_dict),
            w2v.shape[0],
            w2v.shape[1],
            300,
            len(label_dict),
            2,
            300,
            dropout_rnn=0.1,
            initialEmb=w2v,
            dropout_emb=0.1,
            fix_embedding=fix_embedding)
    elif model == 'cnn':
        model = multidomain_sentiment.models.create_cnn_predictor(
            len(domain_dict),
            w2v.shape[0],
            w2v.shape[1],
            300,
            len(label_dict),
            300,
            dropout_fc=0.1,
            initialEmb=w2v,
            dropout_emb=0.1,
            fix_embedding=fix_embedding)
    else:
        assert not "should not get here"

    classifier = multidomain_sentiment.models.MultiDomainClassifier(
        model, domain_dict=domain_dict)

    if gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        classifier.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam(alpha=lr)
    optimizer.setup(classifier)

    train_iter = chainer.iterators.SerialIterator(train_dataset, batchsize)

    # Set up a trainer
    updater = training.StandardUpdater(
        train_iter,
        optimizer,
        device=gpu,
        converter=multidomain_sentiment.training.convert)

    if dev_dataset is not None:
        stop_trigger = EarlyStoppingTrigger(monitor='validation/main/loss',
                                            max_trigger=(epoch, 'epoch'))
        trainer = training.Trainer(updater, stop_trigger, out=out)

        logger.info("train: {},  dev: {}".format(len(train_dataset),
                                                 len(dev_dataset)))
        # Evaluate the model with the development dataset for each epoch
        dev_iter = chainer.iterators.SerialIterator(dev_dataset,
                                                    batchsize,
                                                    repeat=False,
                                                    shuffle=False)

        evaluator = extensions.Evaluator(
            dev_iter,
            classifier,
            device=gpu,
            converter=multidomain_sentiment.training.convert)
        trainer.extend(evaluator, trigger=frequency)
        # This works together with EarlyStoppingTrigger to provide more reliable
        # early stopping
        trainer.extend(SaveRestore(),
                       trigger=chainer.training.triggers.MinValueTrigger(
                           'validation/main/loss'))
    else:
        trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)
        logger.info("train: {}".format(len(train_dataset)))
        # SaveRestore will save the snapshot when dev_dataset is available
        trainer.extend(extensions.snapshot(), trigger=frequency)

    logger.info("With labels: %s" % json.dumps(label_dict))
    # Take a snapshot for each specified epoch
    if gpu < 0:
        # ParameterStatistics does not work with GPU as of chainer 2.x
        # https://github.com/chainer/chainer/issues/3027
        trainer.extend(extensions.ParameterStatistics(model,
                                                      trigger=(100,
                                                               'iteration')),
                       priority=99)

    # Write a log of evaluation statistics for each iteration
    trainer.extend(extensions.LogReport(trigger=(1, 'iteration')), priority=98)
    trainer.extend(extensions.PrintReport([
        'epoch', 'main/loss', 'validation/main/loss', 'main/accuracy',
        'validation/main/accuracy'
    ]),
                   trigger=frequency,
                   priority=97)

    if resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(resume, trainer)

    logger.info("Started training")
    trainer.run()

    # Save final model (without trainer)
    chainer.serializers.save_npz(os.path.join(out, 'trained_model'), model)
    with open(os.path.join(out, 'vocab.json'), 'w') as fout:
        json.dump(vocab, fout)
Exemplo n.º 18
0
    trainer.extend(extensions.dump_graph('main/loss'))
    trainer.extend(extensions.Evaluator(valid_iter,
                                        model,
                                        converter=util.converter,
                                        device=device),
                   name='val')
    trainer.extend(
        extensions.PrintReport([
            'epoch', 'iteration', 'main/loss', 'val/main/loss', 'elapsed_time'
        ]))
    trainer.extend(
        extensions.PlotReport(['main/loss', 'val/main/loss'],
                              x_key='epoch',
                              file_name='loss.png'))
    trainer.extend(
        extensions.ParameterStatistics(model.predictor.W, {'mean': xp.mean},
                                       report_grads=True))

    trainer.run()
    gen_model = util.Generator(predictor=predictor, device=device, max_size=30)
    with chainer.using_config('train', False), chainer.using_config(
            'enable_backprop', False):
        ys_list = gen_model(test)
        for ys in ys_list:
            for y in ys:
                y = int(y)
                if y is vocab['<eos>']:
                    print('\n')
                    break
                print(rvocab[y], end='')

    if device >= 0:
Exemplo n.º 19
0
def get_trainer(args):
    config = yaml.load(open(args.config))

    # Set workspace size
    if 'max_workspace_size' in config:
        chainer.cuda.set_max_workspace_size(config['max_workspace_size'])

    # Show the setup information
    print('==========================================')
    print('Chainer version: {}'.format(chainer.__version__))
    print('CuPy version: {}'.format(chainer.cuda.cupy.__version__))
    print('cuda: {}, cudnn: {}, nccl: {}'.format(
        chainer.cuda.available,
        chainer.cuda.cudnn_enabled,
    ))

    # Prepare devices
    print('Devices:')
    devices = {'main': args.gpus[0]}
    print('\tmain:', args.gpus[0])
    for gid in args.gpus[1:]:
        devices['gpu{}'.format(gid)] = gid
        print('\tgpu{}'.format(gid), gid)

    # Create result_dir
    if args.result_dir is not None:
        config['result_dir'] = args.result_dir
        model_fn = config['model']['module'].split('.')[-1]
        sys.path.insert(0, args.result_dir)
        config['model']['module'] = model_fn
    else:
        config['result_dir'] = create_result_dir_from_config_path(args.config)
    log_fn = save_config_get_log_fn(config['result_dir'], args.config)
    print('result_dir:', config['result_dir'])

    # Instantiate model
    model = get_model_from_config(config)
    print('model:', model.__class__.__name__)

    # Initialize optimizer
    optimizer = get_optimizer_from_config(model, config)
    print('optimizer:', optimizer.__class__.__name__)

    # Setting up datasets
    train_dataset, valid_dataset = get_dataset_from_config(config)
    print('train_dataset: {}'.format(len(train_dataset)),
          train_dataset.__class__.__name__)
    print('valid_dataset: {}'.format(len(valid_dataset)),
          valid_dataset.__class__.__name__)

    # Create iterators
    train_iter, valid_iter = create_iterators(train_dataset, valid_dataset,
                                              config)
    print('train_iter:', train_iter.__class__.__name__)
    print('valid_iter:', valid_iter.__class__.__name__)

    # Create updater and trainer
    if 'updater_creator' in config:
        updater_creator = get_updater_creator_from_config(config)
        updater = updater_creator(train_iter, optimizer, devices)
    else:
        updater = create_updater(train_iter, optimizer, devices)
    print('updater:', updater.__class__.__name__)

    # Create Trainer
    trainer = training.Trainer(updater,
                               config['stop_trigger'],
                               out=config['result_dir'])
    print('Trainer stops:', config['stop_trigger'])

    # Trainer extensions
    for ext in config['trainer_extension']:
        ext, values = ext.popitem()
        if ext == 'LogReport':
            trigger = values['trigger']
            trainer.extend(
                extensions.LogReport(trigger=trigger, log_name=log_fn))
        elif ext == 'observe_lr':
            trainer.extend(extensions.observe_lr(), trigger=values['trigger'])
        elif ext == 'dump_graph':
            trainer.extend(extensions.dump_graph(**values))
        elif ext == 'Evaluator':
            assert 'module' in values
            mod = import_module(values['module'])
            evaluator = getattr(mod, values['name'])
            if evaluator is extensions.Evaluator:
                evaluator = evaluator(valid_iter, model, device=args.gpus[0])
            else:
                evaluator = evaluator(valid_iter, model.predictor)
            trainer.extend(evaluator,
                           trigger=values['trigger'],
                           name=values['prefix'])
        elif ext == 'PlotReport':
            trainer.extend(extensions.PlotReport(**values))
        elif ext == 'PrintReport':
            trigger = values.pop('trigger')
            trainer.extend(extensions.PrintReport(**values), trigger=trigger)
        elif ext == 'ProgressBar':
            upd_int = values['update_interval']
            trigger = values['trigger']
            trainer.extend(extensions.ProgressBar(update_interval=upd_int),
                           trigger=trigger)
        elif ext == 'snapshot':
            filename = values['filename']
            trigger = values['trigger']
            trainer.extend(extensions.snapshot(filename=filename),
                           trigger=trigger)
        elif ext == 'ParameterStatistics':
            links = []
            for link_name in values.pop('links'):
                lns = [ln.strip() for ln in link_name.split('.') if ln.strip()]
                target = model.predictor
                for ln in lns:
                    target = getattr(target, ln)
                links.append(target)
            trainer.extend(extensions.ParameterStatistics(links, **values))
        elif ext == 'custom':
            custom_extension = get_custum_extension_from_config(values)
            trainer.extend(custom_extension)

    # LR decay
    if 'lr_drop_ratio' in config['optimizer'] \
            and 'lr_drop_triggers' in config['optimizer']:
        ratio = config['optimizer']['lr_drop_ratio']
        points = config['optimizer']['lr_drop_triggers']['points']
        unit = config['optimizer']['lr_drop_triggers']['unit']
        drop_trigger = triggers.ManualScheduleTrigger(points, unit)

        def lr_drop(trainer):
            trainer.updater.get_optimizer('main').lr *= ratio

        trainer.extend(lr_drop, trigger=drop_trigger)

    if 'lr_drop_poly_power' in config['optimizer']:
        power = config['optimizer']['lr_drop_poly_power']
        stop_trigger = config['stop_trigger']
        batchsize = train_iter.batch_size
        len_dataset = len(train_dataset)
        trainer.extend(PolynomialShift('lr', power, stop_trigger, batchsize,
                                       len_dataset),
                       trigger=(1, 'iteration'))

    # Resume
    if args.resume is not None:
        serializers.load_npz(args.resume, trainer)
        print('Resumed from:', args.resume)

    print('==========================================')

    return trainer
Exemplo n.º 20
0
def main():
    args = arguments()
    outdir = os.path.join(args.out, dt.now().strftime('%m%d_%H%M') + "_cgan")

    #    chainer.config.type_check = False
    chainer.config.autotune = True
    chainer.config.dtype = dtypes[args.dtype]
    chainer.print_runtime_info()
    #print('Chainer version: ', chainer.__version__)
    #print('GPU availability:', chainer.cuda.available)
    #print('cuDNN availability:', chainer.cuda.cudnn_enabled)
    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()

    ## dataset preparation
    train_d = Dataset(args.train,
                      args.root,
                      args.from_col,
                      args.to_col,
                      clipA=args.clipA,
                      clipB=args.clipB,
                      class_num=args.class_num,
                      crop=(args.crop_height, args.crop_width),
                      imgtype=args.imgtype,
                      random=args.random_translate,
                      grey=args.grey,
                      BtoA=args.btoa)
    test_d = Dataset(args.val,
                     args.root,
                     args.from_col,
                     args.to_col,
                     clipA=args.clipA,
                     clipB=args.clipB,
                     class_num=args.class_num,
                     crop=(args.crop_height, args.crop_width),
                     imgtype=args.imgtype,
                     random=args.random_translate,
                     grey=args.grey,
                     BtoA=args.btoa)
    args.crop_height, args.crop_width = train_d.crop
    if (len(train_d) == 0):
        print("No images found!")
        exit()

    # setup training/validation data iterators
    train_iter = chainer.iterators.SerialIterator(train_d, args.batch_size)
    test_iter = chainer.iterators.SerialIterator(test_d,
                                                 args.nvis,
                                                 shuffle=False)
    test_iter_gt = chainer.iterators.SerialIterator(
        train_d, args.nvis,
        shuffle=False)  ## same as training data; used for validation

    args.ch = len(train_d[0][0])
    args.out_ch = len(train_d[0][1])
    print("Input channels {}, Output channels {}".format(args.ch, args.out_ch))
    if (len(train_d) * len(test_d) == 0):
        print("No images found!")
        exit()

    ## Set up models
    # shared pretrained layer
    if (args.gen_pretrained_encoder and args.gen_pretrained_lr_ratio == 0):
        if "resnet" in args.gen_pretrained_encoder:
            pretrained = L.ResNet50Layers()
            print("Pretrained ResNet model loaded.")
        else:
            pretrained = L.VGG16Layers()
            print("Pretrained VGG model loaded.")
        if args.gpu >= 0:
            pretrained.to_gpu()
        enc_x = net.Encoder(args, pretrained)
    else:
        enc_x = net.Encoder(args)


#    gen = net.Generator(args)
    dec_y = net.Decoder(args)

    if args.lambda_dis > 0:
        dis = net.Discriminator(args)
        models = {'enc_x': enc_x, 'dec_y': dec_y, 'dis': dis}
    else:
        dis = L.Linear(1, 1)
        models = {'enc_x': enc_x, 'dec_y': dec_y}

    ## load learnt models
    optimiser_files = []
    if args.model_gen:
        serializers.load_npz(args.model_gen, enc_x)
        serializers.load_npz(args.model_gen.replace('enc_x', 'dec_y'), dec_y)
        print('model loaded: {}, {}'.format(
            args.model_gen, args.model_gen.replace('enc_x', 'dec_y')))
        optimiser_files.append(args.model_gen.replace('enc_x', 'opt_enc_x'))
        optimiser_files.append(args.model_gen.replace('enc_x', 'opt_dec_y'))
    if args.model_dis:
        serializers.load_npz(args.model_dis, dis)
        print('model loaded: {}'.format(args.model_dis))
        optimiser_files.append(args.model_dis.replace('dis', 'opt_dis'))

    ## send models to GPU
    if args.gpu >= 0:
        enc_x.to_gpu()
        dec_y.to_gpu()
        dis.to_gpu()

    # Setup optimisers
    def make_optimizer(model, lr, opttype='Adam', pretrained_lr_ratio=1.0):
        #        eps = 1e-5 if args.dtype==np.float16 else 1e-8
        optimizer = optim[opttype](lr)
        optimizer.setup(model)
        if args.weight_decay > 0:
            if opttype in ['Adam', 'AdaBound', 'Eve']:
                optimizer.weight_decay_rate = args.weight_decay
            else:
                if args.weight_decay_norm == 'l2':
                    optimizer.add_hook(
                        chainer.optimizer.WeightDecay(args.weight_decay))
                else:
                    optimizer.add_hook(
                        chainer.optimizer_hooks.Lasso(args.weight_decay))
        return optimizer

    opt_enc_x = make_optimizer(enc_x, args.learning_rate_gen, args.optimizer)
    opt_dec_y = make_optimizer(dec_y, args.learning_rate_gen, args.optimizer)
    opt_dis = make_optimizer(dis, args.learning_rate_dis, args.optimizer)

    optimizers = {'enc_x': opt_enc_x, 'dec_y': opt_dec_y, 'dis': opt_dis}

    ## resume optimisers from file
    if args.load_optimizer:
        for (m, e) in zip(optimiser_files, optimizers):
            if m:
                try:
                    serializers.load_npz(m, optimizers[e])
                    print('optimiser loaded: {}'.format(m))
                except:
                    print("couldn't load {}".format(m))
                    pass

    # finetuning
    if args.gen_pretrained_encoder:
        if args.gen_pretrained_lr_ratio == 0:
            enc_x.base.disable_update()
        else:
            for func_name in enc_x.encoder.base._children:
                for param in enc_x.encoder.base[func_name].params():
                    param.update_rule.hyperparam.eta *= args.gen_pretrained_lr_ratio

    # Set up trainer
    updater = Updater(
        models=(enc_x, dec_y, dis),
        iterator={'main': train_iter},
        optimizer=optimizers,
        #        converter=convert.ConcatWithAsyncTransfer(),
        params={'args': args},
        device=args.gpu)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=outdir)

    ## save learnt results at a specified interval or at the end of training
    if args.snapinterval < 0:
        args.snapinterval = args.epoch
    snapshot_interval = (args.snapinterval, 'epoch')
    display_interval = (args.display_interval, 'iteration')

    for e in models:
        trainer.extend(extensions.snapshot_object(models[e],
                                                  e + '{.updater.epoch}.npz'),
                       trigger=snapshot_interval)
        if args.parameter_statistics:
            trainer.extend(extensions.ParameterStatistics(
                models[e]))  ## very slow
    for e in optimizers:
        trainer.extend(extensions.snapshot_object(
            optimizers[e], 'opt_' + e + '{.updater.epoch}.npz'),
                       trigger=snapshot_interval)

    ## plot NN graph
    if args.lambda_rec_l1 > 0:
        trainer.extend(
            extensions.dump_graph('dec_y/loss_L1', out_name='enc.dot'))
    elif args.lambda_rec_l2 > 0:
        trainer.extend(
            extensions.dump_graph('dec_y/loss_L2', out_name='gen.dot'))
    elif args.lambda_rec_ce > 0:
        trainer.extend(
            extensions.dump_graph('dec_y/loss_CE', out_name='gen.dot'))
    if args.lambda_dis > 0:
        trainer.extend(
            extensions.dump_graph('dis/loss_real', out_name='dis.dot'))

    ## log outputs
    log_keys = ['epoch', 'iteration', 'lr']
    log_keys_gen = ['myval/loss_L1', 'myval/loss_L2']
    log_keys_dis = []
    if args.lambda_rec_l1 > 0:
        log_keys_gen.append('dec_y/loss_L1')
    if args.lambda_rec_l2 > 0:
        log_keys_gen.append('dec_y/loss_L2')
    if args.lambda_rec_ce > 0:
        log_keys_gen.extend(['dec_y/loss_CE', 'myval/loss_CE'])
    if args.lambda_reg > 0:
        log_keys.extend(['enc_x/loss_reg'])
    if args.lambda_tv > 0:
        log_keys_gen.append('dec_y/loss_tv')
    if args.lambda_dis > 0:
        log_keys_dis.extend(
            ['dec_y/loss_dis', 'dis/loss_real', 'dis/loss_fake'])
    if args.lambda_mispair > 0:
        log_keys_dis.append('dis/loss_mispair')
    if args.dis_wgan:
        log_keys_dis.extend(['dis/loss_gp'])
    trainer.extend(extensions.LogReport(trigger=display_interval))
    trainer.extend(extensions.PrintReport(log_keys + log_keys_gen +
                                          log_keys_dis),
                   trigger=display_interval)
    if extensions.PlotReport.available():
        #        trainer.extend(extensions.PlotReport(['lr'], 'iteration',trigger=display_interval, file_name='lr.png'))
        trainer.extend(
            extensions.PlotReport(log_keys_gen,
                                  'iteration',
                                  trigger=display_interval,
                                  file_name='loss_gen.png',
                                  postprocess=plot_log))
        trainer.extend(
            extensions.PlotReport(log_keys_dis,
                                  'iteration',
                                  trigger=display_interval,
                                  file_name='loss_dis.png'))
    trainer.extend(extensions.ProgressBar(update_interval=10))

    # learning rate scheduling
    trainer.extend(extensions.observe_lr(optimizer_name='enc_x'),
                   trigger=display_interval)
    if args.optimizer in ['Adam', 'AdaBound', 'Eve']:
        lr_target = 'eta'
    else:
        lr_target = 'lr'
    if args.lr_drop > 0:  ## cosine annealing
        for e in [opt_enc_x, opt_dec_y, opt_dis]:
            trainer.extend(CosineShift(lr_target,
                                       args.epoch // args.lr_drop,
                                       optimizer=e),
                           trigger=(1, 'epoch'))
    else:
        for e in [opt_enc_x, opt_dec_y, opt_dis]:
            #trainer.extend(extensions.LinearShift('eta', (1.0,0.0), (decay_start_iter,decay_end_iter), optimizer=e))
            trainer.extend(extensions.ExponentialShift('lr', 0.33,
                                                       optimizer=e),
                           trigger=(args.epoch // args.lr_drop, 'epoch'))

    # evaluation
    vis_folder = os.path.join(outdir, "vis")
    os.makedirs(vis_folder, exist_ok=True)
    if not args.vis_freq:
        args.vis_freq = max(len(train_d) // 2, 50)
    trainer.extend(VisEvaluator({
        "test": test_iter,
        "train": test_iter_gt
    }, {
        "enc_x": enc_x,
        "dec_y": dec_y
    },
                                params={
                                    'vis_out': vis_folder,
                                    'args': args
                                },
                                device=args.gpu),
                   trigger=(args.vis_freq, 'iteration'))

    # ChainerUI: removed until ChainerUI updates to be compatible with Chainer 6.0
    trainer.extend(CommandsExtension())

    # Run the training
    print("\nresults are saved under: ", outdir)
    save_args(args, outdir)
    trainer.run()
Exemplo n.º 21
0
    def set_event_handler(self):

        self.set_target()

        # (Not Implemented)Evaluator(train)
        self.trainer.extend(extensions.Evaluator(
            self.valid_loader,
            self.target,
            converter=self.converter,
            device=self.device,
        ),
                            trigger=(self.eval_interval, 'epoch'),
                            call_before_training=self.call_before_training)

        self.trainer.extend(extensions.ProgressBar())

        self.trainer.extend(extensions.observe_lr())

        # self.trainer.extend(extensions.MicroAverage('loss', 'lr', 'mav'))

        self.trainer.extend(extensions.LogReport(trigger=(self.log_interval,
                                                          'epoch')),
                            call_before_training=self.call_before_training)

        self.trainer.extend(extensions.FailOnNonNumber())

        # self.trainer.extend(extensions.ExponentialShift('lr', rate=0.9))
        self.trainer.extend(
            extensions.ExponentialShift('lr', rate=0.99, init=self.lr * 10.0))
        # (Not Implemented)InverseShift
        # (Not Implemented)LinearShift
        # (Not Implemented)MultistepShift
        # (Not Implemented)PolynomialShift
        # (Not Implemented)StepShift
        # (Not Implemented)WarmupShift

        self.trainer.extend(
            extensions.ParameterStatistics(self.model,
                                           trigger=(self.eval_interval,
                                                    'epoch')))

        self.trainer.extend(extensions.VariableStatisticsPlot(self.model))

        self.trainer.extend(extensions.PrintReport([
            'epoch', 'main/loss', 'main/accuracy', 'validation/main/loss',
            'validation/main/accuracy', 'elapsed_time'
        ]),
                            call_before_training=self.call_before_training)

        self.trainer.extend(extensions.PlotReport(
            ['main/loss', 'validation/main/loss'],
            'epoch',
            file_name='loss.png'),
                            call_before_training=self.call_before_training)
        self.trainer.extend(extensions.PlotReport(
            ['main/accuracy', 'validation/main/accuracy'],
            'epoch',
            file_name='accuracy.png'),
                            call_before_training=self.call_before_training)

        self.trainer.extend(extensions.snapshot(n_retains=self.retain_num),
                            trigger=(self.log_interval, 'epoch'))

        self.set_additonal_event_handler()
Exemplo n.º 22
0
'''
from chainer import optimizers
from chainer import training

# ネットワークを作成
predictor = MLP()

# L.Classifier でラップし、損失の計算などをモデルに含める
net = L.Classifier(predictor)

# 最適化手法を選択してオプティマイザを作成し、最適化対象のネットワークを持たせる
optimizer = optimizers.MomentumSGD(lr=0.1).setup(net)

# アップデータにイテレータとオプティマイザを渡す
updater = training.StandardUpdater(train_iter, optimizer, device=-1) # device=-1でCPUでの計算実行を指定

trainer = training.Trainer(updater, (30, 'epoch'), out='results/iris_result1')

from chainer.training import extensions

trainer.extend(extensions.LogReport(trigger=(1, 'epoch'), log_name='log'))
trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
trainer.extend(extensions.dump_graph('main/loss'))
trainer.extend(extensions.Evaluator(valid_iter, net, device=-1), name='val')
trainer.extend(extensions.PrintReport(['epoch', 'iteration', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'fc1/W/data/mean', 'elapsed_time']))
trainer.extend(extensions.PlotReport(['fc1/W/grad/mean'], x_key='epoch', file_name='mean.png'))
trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png'))
trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
trainer.extend(extensions.ParameterStatistics(net.predictor.fc1, {'mean': np.mean}, report_grads=True))

trainer.run()
Exemplo n.º 23
0
model = TripletLossClassifier(model, lossfun=F.triplet)
optimizer = optimizers.SGD(lr=0.01).setup(model)
updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)
trainer = training.Trainer(updater, (max_epoch, 'epoch'), out='mnist_result')

# Extensions for trainer
trainer.extend(extensions.LogReport())
trainer.extend(extensions.Evaluator(valid_iter, model, device=gpu_id),
               name='val')
trainer.extend(
    extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'l1/W/data/std', 'elapsed_time'
    ]))
trainer.extend(
    extensions.ParameterStatistics(model.predictor.l1, {'std': np.std}))
trainer.extend(
    extensions.PlotReport(['main/loss', 'val/main/loss'],
                          x_key='epoch',
                          file_name='loss.png'))
trainer.extend(
    extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                          x_key='epoch',
                          file_name='accuracy.png'))

# Run
trainer.run()

# Evaluate w/ test data
test_evaluator = extensions.Evaluator(test_iter, model, device=gpu_id)
results = test_evaluator()
Exemplo n.º 24
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--config_path',
                        type=str,
                        default='configs/base.yml',
                        help='path to config file')
    parser.add_argument('--gpu',
                        type=int,
                        default=0,
                        help='index of gpu to be used')
    parser.add_argument('--input_dir', type=str, default='./data/imagenet')
    parser.add_argument('--truth_dir', type=str, default='./data/imagenet')
    parser.add_argument('--results_dir',
                        type=str,
                        default='./results/gans',
                        help='directory to save the results to')
    parser.add_argument('--snapshot',
                        type=str,
                        default='',
                        help='path to the snapshot file to use')
    parser.add_argument('--enc_model',
                        type=str,
                        default='',
                        help='path to the generator .npz file')
    parser.add_argument('--gen_model',
                        type=str,
                        default='',
                        help='path to the generator .npz file')
    parser.add_argument('--dis_model',
                        type=str,
                        default='',
                        help='path to the discriminator .npz file')
    parser.add_argument('--loaderjob',
                        type=int,
                        help='number of parallel data loading processes')

    args = parser.parse_args()
    config = yaml_utils.Config(yaml.load(open(args.config_path)))
    chainer.cuda.get_device_from_id(args.gpu).use()
    gen, dis, enc = load_models(config)

    chainer.serializers.load_npz(args.gen_model, gen, strict=False)
    chainer.serializers.load_npz(args.dis_model, dis)
    chainer.serializers.load_npz(args.enc_model, enc)

    gen.to_gpu(device=args.gpu)
    dis.to_gpu(device=args.gpu)
    enc.to_gpu(device=args.gpu)
    models = {"gen": gen, "dis": dis, "enc": enc}
    opt_gen = make_optimizer(gen,
                             alpha=config.adam['alpha'],
                             beta1=config.adam['beta1'],
                             beta2=config.adam['beta2'])
    opt_gen.add_hook(chainer.optimizer.WeightDecay(config.weight_decay))
    opt_gen.add_hook(chainer.optimizer.GradientClipping(config.grad_clip))

    # disable update of pre-trained weights
    layers_to_train = ['lA1', 'lA2', 'lB1', 'lB2', 'preluW', 'preluMiddleW']
    for layer in gen.children():
        if not layer.name in layers_to_train:
            layer.disable_update()

    lmd_pixel = 0.05

    def fast_loss(out, gt):
        l1 = reconstruction_loss(dis, out, gt)
        l2 = lmd_pixel * pixel_loss(out, gt)
        loss = l1 + l2
        return loss

    gen.set_fast_loss(fast_loss)

    opts = {"opt_gen": opt_gen}

    # Dataset
    config['dataset']['args']['root_input'] = args.input_dir
    config['dataset']['args']['root_truth'] = args.truth_dir
    dataset = yaml_utils.load_dataset(config)
    # Iterator
    iterator = chainer.iterators.MultiprocessIterator(
        dataset, config.batchsize, n_processes=args.loaderjob)
    kwargs = config.updater['args'] if 'args' in config.updater else {}
    kwargs.update({
        'models': models,
        'iterator': iterator,
        'optimizer': opts,
    })
    updater = yaml_utils.load_updater_class(config)
    updater = updater(**kwargs)
    out = args.results_dir
    create_result_dir(out, args.config_path, config)
    trainer = training.Trainer(updater, (config.iteration, 'iteration'),
                               out=out)
    report_keys = [
        "loss_noab", "loss1", "loss2", "loss3", "fast_alpha", "loss_ae",
        "fast_benefit", "min_slope", "max_slope", "min_slope_middle",
        "max_slope_middle"
    ]
    # Set up logging
    trainer.extend(extensions.snapshot(),
                   trigger=(config.snapshot_interval, 'iteration'))
    for m in models.values():
        trainer.extend(extensions.snapshot_object(
            m, m.__class__.__name__ + '_{.updater.iteration}.npz'),
                       trigger=(config.snapshot_interval, 'iteration'))
    trainer.extend(
        extensions.LogReport(keys=report_keys,
                             trigger=(config.display_interval, 'iteration')))
    trainer.extend(extensions.ParameterStatistics(gen),
                   trigger=(config.display_interval, 'iteration'))
    trainer.extend(extensions.PrintReport(report_keys),
                   trigger=(config.display_interval, 'iteration'))

    trainer.extend(sample_reconstruction_auxab(enc,
                                               gen,
                                               out,
                                               n_classes=gen.n_classes),
                   trigger=(config.evaluation_interval, 'iteration'),
                   priority=extension.PRIORITY_WRITER)
    trainer.extend(
        extensions.ProgressBar(update_interval=config.progressbar_interval))
    ext_opt_gen = extensions.LinearShift(
        'alpha', (config.adam['alpha'], 0.),
        (config.iteration_decay_start, config.iteration), opt_gen)
    trainer.extend(ext_opt_gen)
    if args.snapshot:
        print("Resume training with snapshot:{}".format(args.snapshot))
        chainer.serializers.load_npz(args.snapshot, trainer)

    # Run the training
    print("start training")
    trainer.run()
Exemplo n.º 25
0

    # ネットワークをClassifierで包んで、ロスの計算などをモデルに含める
    net = L.Classifier(net, lossfun=F.softmax_cross_entropy, accfun=F.accuracy)

    # 最適化手法の選択
    optimizer = optimizers.SGD(lr=0.01).setup(net)

    # UpdaterにIteratorとOptimizerを渡す
    updater = training.StandardUpdater(train_iter, optimizer, device=gpu_id)

    max_epoch = 10

    # TrainerにUpdaterを渡す
    trainer = training.Trainer(
        updater, (max_epoch, 'epoch'), out='mnist_result')

    trainer.extend(extensions.LogReport())
    trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
    trainer.extend(extensions.Evaluator(valid_iter, net, device=gpu_id), name='val')
    trainer.extend(extensions.PrintReport(['epoch', 'main/loss', 'main/accuracy', 'val/main/loss', 'val/main/accuracy', 'l1/W/data/std', 'elapsed_time']))
    trainer.extend(extensions.ParameterStatistics(net.predictor.l1, {'std': np.std}))
    trainer.extend(extensions.PlotReport(['l1/W/data/std'], x_key='epoch', file_name='std.png'))
    trainer.extend(extensions.PlotReport(['main/loss', 'val/main/loss'], x_key='epoch', file_name='loss.png'))
    trainer.extend(extensions.PlotReport(['main/accuracy', 'val/main/accuracy'], x_key='epoch', file_name='accuracy.png'))
    trainer.extend(extensions.dump_graph('main/loss'))

    trainer.run()

    chainer.serializers.save_npz('my_mnist.npz', net)
Exemplo n.º 26
0
max_epoch = 10

trainer = training.Trainer(updater, (max_epoch, 'epoch'),
                           out='werewolf_result_batchsize14_nofirstday_1hl')

trainer.extend(extensions.LogReport())
trainer.extend(extensions.snapshot(filename='snapshot_epoch-{.updater.epoch}'))
trainer.extend(extensions.Evaluator(valid_iter, network, device=gpu_id),
               name='val')
trainer.extend(
    extensions.PrintReport([
        'epoch', 'main/loss', 'main/accuracy', 'val/main/loss',
        'val/main/accuracy', 'l1/W/data/std', 'elapsed_time'
    ]))
trainer.extend(
    extensions.ParameterStatistics(network.predictor.l1, {'std': np.std}))
trainer.extend(
    extensions.PlotReport(['l1/W/data/std'],
                          x_key='epoch',
                          file_name='std.png'))
trainer.extend(
    extensions.PlotReport(['main/loss', 'val/main/loss'],
                          x_key='epoch',
                          file_name='loss.png'))
trainer.extend(
    extensions.PlotReport(['main/accuracy', 'val/main/accuracy'],
                          x_key='epoch',
                          file_name='accuracy.png'))
trainer.extend(extensions.dump_graph('main/loss'))

trainer.run()
Exemplo n.º 27
0
def run(epoch, frequency, gpu, out, word2vec, beer_train, beer_labels,
        beer_test, batchsize, negative_samples, ntopics, lr,
        orthogonality_penalty, fix_embedding, resume):
    if (beer_labels is None) != (beer_test is None):
        raise click.BadParameter(
            "Both or neither beer-labels and beer-test can be specified")

    if beer_train is None:
        logger.info('Using 20newsgroup dataset')
        memory = Memory(cachedir=out, verbose=1)
        w2v, vocab, train, test, topic_vectors, label_dict = \
            memory.cache(abae.dataset.prepare_20news)(word2vec, ntopics)
    else:
        logger.info('Using beer adovocate dataset.')
        memory = Memory(cachedir=out, verbose=1, mmap_mode='r')
        w2v, vocab, train, test, topic_vectors, label_dict = \
            memory.cache(abae.dataset.prepare_beer_advocate)(
                beer_train, beer_test, beer_labels, word2vec, ntopics)

    model = abae.model.ABAE(w2v.shape[0],
                            w2v.shape[1],
                            ntopics,
                            fix_embedding=fix_embedding,
                            orthogonality_penalty=orthogonality_penalty)
    model.initialize(w2v, topic_vectors)
    if gpu >= 0:
        # Make a specified GPU current
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()  # Copy the model to the GPU

    # Setup an optimizer
    optimizer = chainer.optimizers.Adam(alpha=lr)
    optimizer.setup(model)

    train_iter = abae.iterator.NegativeSampleIterator(train, batchsize,
                                                      negative_samples)

    # Set up a trainer
    updater = training.StandardUpdater(
        train_iter,
        optimizer,
        device=gpu,
        converter=abae.iterator.concat_examples_ns)
    trainer = training.Trainer(updater, (epoch, 'epoch'), out=out)

    if test is not None:
        logger.info("train: {},  test: {}".format(len(train), len(test)))
        # Evaluate the model with the test dataset for each epoch
        test_iter = abae.iterator.NegativeSampleIterator(test,
                                                         batchsize,
                                                         negative_samples,
                                                         repeat=False,
                                                         shuffle=False)
        trainer.extend(extensions.Evaluator(
            test_iter,
            model,
            device=gpu,
            converter=abae.iterator.concat_examples_ns),
                       trigger=(500, 'iteration'))
        trainer.extend(abae.evaluator.TopicMatchEvaluator(
            test_iter,
            model,
            label_dict=label_dict,
            device=gpu,
            converter=abae.iterator.concat_examples_ns),
                       trigger=(500, 'iteration'))
    else:
        logger.info("train: {}".format(len(train)))

    logger.info("With labels: %s" % json.dumps(label_dict))
    # Take a snapshot for each specified epoch
    trigger = (epoch, 'epoch') if frequency == -1 else (frequency, 'iteration')
    trainer.extend(extensions.snapshot(), trigger=trigger)
    if gpu < 0:
        # ParameterStatistics does not work with GPU as of chainer 2.x
        # https://github.com/chainer/chainer/issues/3027
        trainer.extend(
            extensions.ParameterStatistics(model, trigger=(10, 'iteration')))

    # Write a log of evaluation statistics for each epoch
    trainer.extend(extensions.LogReport(trigger=(10, 'iteration')))

    # Print a progress bar to stdout
    trainer.extend(extensions.ProgressBar())

    if resume:
        # Resume from a snapshot
        chainer.serializers.load_npz(resume, trainer)

    # Run the training
    trainer.run()

    # Save final model (without trainer)
    model.save(os.path.join(out, 'trained_model'))
    with open(os.path.join(out, 'vocab.json'), 'wb') as fout:
        json.dump(vocab, fout)