Exemplo n.º 1
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument("--model", default=None)
    parser.add_argument("--gpu", type=int, default=0)
    parser.add_argument("--batch_size", type=int, default=4)
    parser.add_argument("--data_dir", type=str, default="./datasets")
    parser.add_argument("--data_list", type=str, default="train.txt")
    parser.add_argument("--n_class", type=int, default=5)
    parser.add_argument("--n_steps", type=int, default=100)
    parser.add_argument("--snapshot_dir", type=str, default="./snapshots")
    parser.add_argument("--save_steps", type=int, default=50)
    args = parser.parse_args()
    print(args)

    if not os.path.exists(args.snapshot_dir):
        os.makedirs(args.snapshot_dir)

    model = RefineResNet(n_class=args.n_class)
    if args.model is not None:
        serializers.load_npz(args.model, model)

    if args.gpu >= 0:
        cuda.get_device(args.gpu).use()
        model.to_gpu()
        xp = cuda.cupy
    else:
        xp = np

    optimizer = Adam()
    #optimizer = MomentumSGD()
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-5), "hook_wd")

    train_dataset = ImageDataset(args.data_dir,
                                 args.data_list,
                                 crop_size=(320, 320))
    train_iterator = MultiprocessIterator(train_dataset,
                                          batch_size=args.batch_size,
                                          repeat=True,
                                          shuffle=True)

    step = 0
    for zipped_batch in train_iterator:
        step += 1
        x = Variable(xp.array([zipped[0] for zipped in zipped_batch]))
        y = Variable(
            xp.array([zipped[1] for zipped in zipped_batch], dtype=xp.int32))
        pred = xp.array(model(x).data, dtype=xp.float32)
        loss = F.softmax_cross_entropy(pred, y)
        optimizer.update(F.softmax_cross_entropy, pred, y)

        print("Step: {}, Loss: {}".format(step, loss.data))
        if step % args.save_steps == 0:
            serializers.save_npz(
                os.path.join(args.snapshot_dir, "model_{}.npz".format(step)),
                model)

        if step >= args.n_steps:
            break
Exemplo n.º 2
0
def train(args):
    model = PeepHoleJaLSTMParser(args.model, args.word_emb_size, args.char_emb_size,
            args.nlayers, args.hidden_dim, args.relu_dim, args.dep_dim, args.dropout_ratio)

    with open(args.model + "/params", "w") as f: log(args, f)

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()


    converter = lambda x, device: \
            concat_examples(x, device=device, padding=-1)

    train = LSTMParserDataset(args.model, args.train)
    train_iter = SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = SerialIterator(
            val, args.batchsize, repeat=False, shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu, converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 2000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model,
                    converter, device=args.gpu), trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'), trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/tagging_loss',
        'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy',
        'validation/main/parsing_accuracy'
    ]), trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 3
0
def train(args):
    model = LSTMTagger(args.model, args.word_emb_size, args.char_emb_size,
                       args.nlayers, args.hidden_dim, args.relu_dim,
                       args.dropout_ratio)
    with open(args.model + "/params", "w") as f:
        log(args, f)
    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    train = LSTMTaggerDataset(args.model, args.train)
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMTaggerDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)
    optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(1e-6))
    # optimizer.add_hook(GradientClipping(5.))
    updater = training.StandardUpdater(train_iter,
                                       optimizer,
                                       converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.Evaluator(val_iter, eval_model, converter),
                   trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.PrintReport([
        'epoch',
        'iteration',
        'main/loss',
        'validation/main/loss',
        'main/accuracy',
        'validation/main/accuracy',
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 4
0
def apply_weightdecay_only_w(model, rate):
    """
    use after setup optimizer
    """

    if hasattr(model, 'params'):
        for p in model.params():
            if p.name == 'W':
                if hasattr(p.update_rule.hyperparam, 'weight_decay_rate'):
                    p.update_rule.hyperparam.weight_decay_rate = rate
                else:
                    p.update_rule.add_hook(WeightDecay(rate))
Exemplo n.º 5
0
    def __call__(self, model, train, test, out_dir,
                 optname='SGD', lr=1.0, rate=0.9, weighting=False,
                 gpu=-1, bsize=64, test_bsize=10, esize=50, mname=None,
                 progress=True, lr_attr='lr', l2=0.0,
                 keys=['main/loss', 'validation/main/loss', 'main/accuracy',
                       'validation/main/accuracy', 'validation_in_mca/main/mca'],
                 s_keys=['validation/main/loss', 'validation/main/accuracy',
                         'validation_in_mca/main/mca'],
                 p_keys=['epoch', 'main/loss', 'validation/main/loss',
                         'main/accuracy', 'validation/main/accuracy',
                         'validation_in_mca/main/mca', 'elapsed_time']):

        train_iter = custom_iterator(train, batch_size=bsize)
        test_iter = custom_iterator(test, batch_size=test_bsize,
                                    repeat=False, shuffle=False)

        if weighting:
            label_cnt = train_iter.get_label_cnt()
            n_cls = len(label_cnt.keys())
            cls_weight = numpy.empty(n_cls)
            for k, cnt in six.iteritems(label_cnt):
                cls_weight[k] = cnt
            cls_weight = (cls_weight.sum() / cls_weight / n_cls).astype(numpy.float32)
        else:
            cls_weight = None
        if gpu >= 0:
            cuda.get_device(gpu).use()
            model.to_gpu()
            if cls_weight is not None:
                cls_weight = cuda.to_gpu(cls_weight)
        model.cls_weight = cls_weight

        optimizer = self.optimizers[optname](lr)
        optimizer.setup(model)
        if l2 > 0:
            optimizer.add_hook(WeightDecay(l2))

        updater = training.StandardUpdater(train_iter, optimizer, device=gpu)

        trainer = training.Trainer(updater, (esize, 'epoch'), out=out_dir)
        trainer.extend(testmode_evaluator(test_iter, model, device=gpu))
        trainer.extend(mca_evaluator(test_iter, model, device=gpu))
        trainer.extend(extensions.LogReport())
        trainer.extend(scale_lr(attr=lr_attr, rate=rate))
        trainer.extend(best_scoring(model, keys, s_keys=s_keys, mname=mname))
        if progress:
            trainer.extend(extensions.PrintReport(p_keys))
            trainer.extend(extensions.ProgressBar())
        trainer.run()
        return model.predictor.best_score
def _setup_optimizer(config, model, comm):
    optimizer_name = config['optimizer']
    lr = float(config['init_lr'])
    weight_decay = float(config['weight_decay'])
    if optimizer_name == 'Adam':
        optimizer = Adam(alpha=lr, weight_decay_rate=weight_decay)
    elif optimizer_name in \
            ('SGD', 'MomentumSGD', 'CorrectedMomentumSGD', 'RMSprop'):
        optimizer = eval(optimizer_name)(lr=lr)
        if weight_decay > 0.:
            optimizer.add_hook(WeightDecay(weight_decay))
    else:
        raise ValueError('Invalid optimizer: {}'.format(optimizer_name))
    if comm is not None:
        optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(model)

    return optimizer
Exemplo n.º 7
0
    def train(self, trainsents, testsents, parserfile):
        classifier = L.Classifier(self.model)

        if self.gpu:
            cuda.get_device().use()
            classifier = classifier.to_gpu()

        trainexamples = self.gen_trainexamples(trainsents)

        optimizer = O.AdaGrad(.01, 1e-6)
        optimizer.setup(classifier)
        optimizer.add_hook(WeightDecay(1e-8))
        # optimizer = O.MomentumSGD(.05, .9)
        # optimizer.setup(classifier)
        # optimizer.add_hook(WeightDecay(1e-4))
        # optimizer.add_hook(ExponentialMovingAverage())

        best_uas = 0.
        print >> sys.stderr, "will run {} iterations".format(self.niters)
        for step in range(1, self.niters + 1):
            batch = random.sample(trainexamples, self.batchsize)
            t = Variable(xp.concatenate(map(lambda ex: ex.target, batch)))
            optimizer.update(classifier, batch, t)
            if type(self.model) == WeightAveragedFF:
                self.model.update_averaged(step)

            print >> sys.stderr, "Epoch:{}\tloss:{}\tacc:{}".format(
                step, classifier.loss.data, classifier.accuracy.data)

            if step % self.evaliter == 0:
                print >> sys.stderr, "Evaluating model on dev data..."
                res = self(testsents)
                uas, las = accuracy(res)

                if uas > best_uas:
                    print >> sys.stderr, "Best score. Saving parser...",
                    self.save(parserfile)
                    best_uas = uas
                    print >> sys.stderr, "done"

        self.save(parserfile + ".final")
        print >> sys.stderr, "done"
Exemplo n.º 8
0
Arquivo: nn.py Projeto: ssethia2/ast
    def init_optimizer(self, opt_cfg):
        print("Setting up optimizer")
        if opt_cfg['type'] == _ADAM:
            print("using ADAM")
            self.optimizer = optimizers.Adam(alpha=opt_cfg['lr'],
                                             beta1=0.9,
                                             beta2=0.999,
                                             eps=1e-08,
                                             amsgrad=True)
        else:
            print("using SGD")
            self.optimizer = optimizers.SGD(lr=opt_cfg['lr'])
        print("learning rate: {0:f}".format(opt_cfg['lr']))
        # Attach optimizer
        self.optimizer.setup(self.model)

        # Add Weight decay
        if opt_cfg['l2'] > 0:
            print("Adding WeightDecay: {0:f}".format(opt_cfg['l2']))
            self.optimizer.add_hook(WeightDecay(opt_cfg['l2']))

        # Gradient clipping
        print("Clipping gradients at: {0:d}".format(opt_cfg['grad_clip']))
        self.optimizer.add_hook(
            GradientClipping(threshold=opt_cfg['grad_clip']))

        # Gradient noise
        if opt_cfg['grad_noise_eta'] > 0:
            print("Adding gradient noise: {0:f}".format(
                opt_cfg['grad_noise_eta']))
            self.optimizer.add_hook(
                chainer.optimizer.GradientNoise(eta=opt_cfg['grad_noise_eta']))

        # Freeze weights
        for l in opt_cfg['freeze']:
            if l in self.model.__dict__:
                print("freezing: {0:s}".format(l))
                self.model[l].disable_update()
            else:
                print("layer {0:s} not in model".format(l))
Exemplo n.º 9
0
from config.parseval_svhn.base import *
from src.model.classifier import LMTraining
from chainer.optimizer import WeightDecay

mode = ['default']
model = LMTraining(predictor, preprocess)
hook = [WeightDecay(5e-4)]
Exemplo n.º 10
0
def training(args):
    source = EventField(fix_length=args.event_size, embed_size=args.src_embed)
    mask_flag = 'tmpl' in args.net
    sentence_size = args.sentence_size if args.truncate else None
    reverse_decode = args.reverse_decode
    if 'disc' in args.net:
        target = TextAndContentWordField(start_token=None,
                                         fix_length=sentence_size,
                                         mask_player=mask_flag,
                                         mask_team=mask_flag,
                                         numbering=args.numbering,
                                         reverse=reverse_decode,
                                         bpc=args.bpc,
                                         multi_tag=args.multi_tag)
    else:
        target = TextField(start_token=None,
                           fix_length=sentence_size,
                           mask_player=mask_flag,
                           mask_team=mask_flag,
                           numbering=args.numbering,
                           reverse=reverse_decode,
                           bpc=args.bpc,
                           multi_tag=args.multi_tag)
    if args.truncate:
        train = OptaDataset(path=args.dataset + '.train',
                            fields={
                                'source': source,
                                'target': target
                            })
    else:
        train = OptaDataset(path=args.dataset + '.train',
                            fields={
                                'source': source,
                                'target': target
                            },
                            limit_length=args.limit)
    source.build_vocabulary(train.source)
    target.build_vocabulary(train.target, size=args.vocab_size)
    target.player_to_id = source.player_to_id
    target.players = source.id_to_player
    if mask_flag or 'disc' in args.net:
        content_word_to_id = getattr(target, 'content_word_to_id', None)
        target_test = TestTextField(source.id_to_player,
                                    source.id_to_team,
                                    target.word_to_id,
                                    content_word_to_id,
                                    target.unk_id,
                                    fix_length=None,
                                    bpc=args.bpc)
    else:
        target_test = TextField(start_token=None,
                                end_token=None,
                                fix_length=None,
                                bpc=args.bpc)
        target_test.word_to_id = target.word_to_id
        target_test.id_to_word = target.id_to_word
        target_test.unk_id = target.unk_id
    dev = OptaDataset(path=args.dataset + '.dev',
                      fields={
                          'source': source,
                          'target': target_test
                      },
                      limit_length=args.limit)
    train2 = OptaDataset(path=args.dataset + '.train',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=args.limit)
    test = OptaDataset(path=args.dataset + '.test',
                       fields={
                           'source': source,
                           'target': target_test
                       })
    test20 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=20)
    test15 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=15)
    test10 = OptaDataset(path=args.dataset + '.test',
                         fields={
                             'source': source,
                             'target': target_test
                         },
                         limit_length=10)

    start_id, end_id = target.word_to_id['<s>'], target.word_to_id['</s>']
    class_weight = compute_class_weight('./dataset/player_list.txt',
                                        target.word_to_id,
                                        args.class_weight[0],
                                        args.class_weight[1],
                                        gpu=args.gpu)
    dirname = Utility.get_save_directory(
        args.net, './debug' if args.debug else args.output)
    if args.debug:
        save_path = os.path.join('./debug', dirname)
    else:
        save_path = os.path.join(args.output, dirname)
    Utility.make_directory(save_path)
    del args.vocab_size
    setting = {
        'vocab_size': len(target.word_to_id),
        'type_size': len(source.type_to_id),
        'player_size': len(source.player_to_id),
        'team_size': len(source.team_to_id),
        'detail_size': len(source.detail_to_id),
        'detail_dim': source.details_dimention,
        'start_id': start_id,
        'end_id': end_id,
        'unk_id': target.unk_id,
        'save_path': save_path,
        **vars(args)
    }
    dump_setting(setting, os.path.join(save_path, 'setting.yaml'))
    home_player_tag = target.word_to_id.get(target.home_player_tag)
    away_player_tag = target.word_to_id.get(target.away_player_tag)
    home_team_tag = target.word_to_id.get(target.home_team_tag)
    away_team_tag = target.word_to_id.get(target.away_team_tag)
    print('vocab size: {}'.format(len(target.word_to_id)))
    if args.net == 'plain':
        model = MLPEncoder2AttentionDecoder(len(source.type_to_id),
                                            len(source.player_to_id),
                                            len(source.team_to_id),
                                            len(source.detail_to_id),
                                            source.details_dimention,
                                            args.src_embed,
                                            args.event_size,
                                            len(target.word_to_id),
                                            args.trg_embed,
                                            args.hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            args.mlp_layers,
                                            args.max_length,
                                            args.dropout,
                                            IGNORE_LABEL,
                                            reverse_decode=reverse_decode)
    elif args.net == 'tmpl':
        model = MLPEncoder2AttentionDecoder(len(source.type_to_id),
                                            len(source.player_to_id),
                                            len(source.team_to_id),
                                            len(source.detail_to_id),
                                            source.details_dimention,
                                            args.src_embed,
                                            args.event_size,
                                            len(target.word_to_id),
                                            args.trg_embed,
                                            args.hidden,
                                            start_id,
                                            end_id,
                                            class_weight,
                                            args.mlp_layers,
                                            args.max_length,
                                            args.dropout,
                                            IGNORE_LABEL,
                                            source.id_to_player,
                                            home_player_tag,
                                            away_player_tag,
                                            source.id_to_team,
                                            home_team_tag,
                                            away_team_tag,
                                            target.player_to_id,
                                            target.players,
                                            reverse_decode=reverse_decode)
    elif args.net == 'gate':
        model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id),
                                                 len(source.player_to_id),
                                                 len(source.team_to_id),
                                                 len(source.detail_to_id),
                                                 source.details_dimention,
                                                 args.src_embed,
                                                 args.event_size,
                                                 len(target.word_to_id),
                                                 args.trg_embed,
                                                 args.hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 args.mlp_layers,
                                                 args.max_length,
                                                 args.dropout,
                                                 IGNORE_LABEL,
                                                 reverse_decode=reverse_decode)
    elif args.net == 'gate-tmpl':
        model = MLPEncoder2GatedAttentionDecoder(len(source.type_to_id),
                                                 len(source.player_to_id),
                                                 len(source.team_to_id),
                                                 len(source.detail_to_id),
                                                 source.details_dimention,
                                                 args.src_embed,
                                                 args.event_size,
                                                 len(target.word_to_id),
                                                 args.trg_embed,
                                                 args.hidden,
                                                 start_id,
                                                 end_id,
                                                 class_weight,
                                                 args.mlp_layers,
                                                 args.max_length,
                                                 args.dropout,
                                                 IGNORE_LABEL,
                                                 source.id_to_player,
                                                 home_player_tag,
                                                 away_player_tag,
                                                 source.id_to_team,
                                                 home_team_tag,
                                                 away_team_tag,
                                                 target.player_to_id,
                                                 target.players,
                                                 reverse_decode=reverse_decode)
    elif args.net == 'disc':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif args.net == 'disc-tmpl':
        model = DiscriminativeMLPEncoder2AttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif args.net == 'gate-disc':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            reverse_decode=reverse_decode)
    elif args.net == 'gate-disc-tmpl':
        model = DiscriminativeMLPEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)
    elif args.net == 'conv-gate-disc-tmpl':
        model = DiscriminativeGLUEncoder2GatedAttentionDecoder(
            len(source.type_to_id),
            len(source.player_to_id),
            len(source.team_to_id),
            len(source.detail_to_id),
            source.details_dimention,
            args.src_embed,
            args.event_size,
            len(target.word_to_id),
            len(target.content_word_to_id),
            args.trg_embed,
            args.hidden,
            start_id,
            end_id,
            class_weight,
            args.loss_weight,
            args.disc_loss,
            args.loss_func,
            args.mlp_layers,
            args.max_length,
            args.dropout,
            IGNORE_LABEL,
            source.id_to_player,
            home_player_tag,
            away_player_tag,
            source.id_to_team,
            home_team_tag,
            away_team_tag,
            target.player_to_id,
            target.players,
            reverse_decode=reverse_decode)

    model.keyword_ids = [
        target.word_to_id['save'], target.word_to_id['block'],
        target.word_to_id['chance'], target.word_to_id['shot'],
        target.word_to_id['clearance'], target.word_to_id['kick'],
        target.word_to_id['ball'], target.word_to_id['blocked'],
        target.word_to_id['denied']
    ]
    model.id_to_word = target.id_to_word
    if args.numbering:
        model.player_id = target.player_id
        model.team_id = target.team_id

    if args.gpu is not None:
        model.use_gpu(args.gpu)
    opt = optimizers.Adam(args.lr)
    opt.setup(model)
    if args.clipping > 0:
        opt.add_hook(GradientClipping(args.clipping))
    if args.decay > 0:
        opt.add_hook(WeightDecay(args.decay))

    N = len(train.source)
    batch_size = args.batch
    order_provider = OrderProvider(Sampling.get_random_order(N))
    src_train_iter = SequentialIterator(train.source,
                                        batch_size,
                                        order_provider,
                                        args.event_size,
                                        source.fillvalue,
                                        gpu=args.gpu)
    if 'disc' in args.net:
        trg_train_iter = TextAndLabelIterator(train.target,
                                              batch_size,
                                              order_provider,
                                              args.sentence_size,
                                              IGNORE_LABEL,
                                              gpu=args.gpu)
    else:
        trg_train_iter = SequentialIterator(train.target,
                                            batch_size,
                                            order_provider,
                                            args.sentence_size,
                                            IGNORE_LABEL,
                                            gpu=args.gpu)
    src_dev_iter = SequentialIterator(dev.source,
                                      batch_size,
                                      None,
                                      args.event_size,
                                      source.fillvalue,
                                      gpu=args.gpu)
    trg_dev_iter = Iterator(dev.target,
                            batch_size,
                            wrapper=EndTokenIdRemoval(end_id),
                            gpu=None)
    src_test_iter = SequentialIterator(test.source,
                                       batch_size,
                                       None,
                                       args.event_size,
                                       source.fillvalue,
                                       gpu=args.gpu)
    src_test20_iter = SequentialIterator(test20.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test15_iter = SequentialIterator(test15.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_test10_iter = SequentialIterator(test10.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    src_train2_iter = SequentialIterator(train2.source,
                                         batch_size,
                                         None,
                                         args.event_size,
                                         source.fillvalue,
                                         gpu=args.gpu)
    trg_train2_iter = Iterator(train2.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test_iter = Iterator(test.target,
                             batch_size,
                             wrapper=EndTokenIdRemoval(end_id),
                             gpu=None)
    trg_test20_iter = Iterator(test20.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test15_iter = Iterator(test15.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    trg_test10_iter = Iterator(test10.target,
                               batch_size,
                               wrapper=EndTokenIdRemoval(end_id),
                               gpu=None)
    if 'disc' in args.net:
        trainer = Seq2SeqWithLabelTrainer(
            model, opt, src_train_iter, trg_train_iter, src_dev_iter,
            trg_dev_iter, order_provider, evaluate_bleu_and_accuracy,
            args.epoch, save_path, args.eval_step, src_train2_iter,
            trg_train2_iter)
    else:
        trainer = Seq2SeqTrainer(model, opt, src_train_iter, trg_train_iter,
                                 src_dev_iter, trg_dev_iter, order_provider,
                                 evaluate_bleu, args.epoch, save_path,
                                 args.eval_step, src_train2_iter,
                                 trg_train2_iter)

    trainer.run()

    # load best model
    model.load_model(os.path.join(save_path, 'best.model'))
    if 'disc' in args.net:
        bleu_score_dev, _, _ = evaluate_bleu_and_accuracy(
            model, src_dev_iter, trg_dev_iter)
        bleu_score, _, _ = evaluate_bleu_and_accuracy(model, src_test_iter,
                                                      trg_test_iter)
        bleu_score20, _, hypotheses = evaluate_bleu_and_accuracy(
            model, src_test20_iter, trg_test20_iter)
        bleu_score15, _, _ = evaluate_bleu_and_accuracy(
            model, src_test15_iter, trg_test15_iter)
        bleu_score10, _, _ = evaluate_bleu_and_accuracy(
            model, src_test10_iter, trg_test10_iter)
    else:
        bleu_score_dev, _ = evaluate_bleu(model, src_dev_iter, trg_dev_iter)
        bleu_score, _ = evaluate_bleu(model, src_test_iter, trg_test_iter)
        bleu_score20, hypotheses = evaluate_bleu(model, src_test20_iter,
                                                 trg_test20_iter)
        bleu_score15, _ = evaluate_bleu(model, src_test15_iter,
                                        trg_test15_iter)
        bleu_score10, _ = evaluate_bleu(model, src_test10_iter,
                                        trg_test10_iter)
    TextFile(os.path.join(save_path, 'hypotheses.txt'),
             [' '.join(ys) for ys in trainer.hypotheses]).save()
    print('dev score: {}'.format(bleu_score_dev))
    print('test score: {}'.format(bleu_score))
    print('test score20: {}'.format(bleu_score20))
    print('test score15: {}'.format(bleu_score15))
    print('test score10: {}'.format(bleu_score10))

    # saving fields
    pickle_dump(os.path.join(save_path, 'source.pkl'), source)
    pickle_dump(os.path.join(save_path, 'target.pkl'), target)
    pickle_dump(os.path.join(save_path, 'target_test.pkl'), target_test)
Exemplo n.º 11
0
def main():
    model_cfgs = {
        'detnas_small_coco': {
            'class': DetNASSmallCOCO,
            'score_layer_name': 'fc',
            'kwargs': {
                #'n_class': 1000
            }
        },
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--trial', action='store_true')
    parser.add_argument('--gpu', type=int, default=0)
    parser.add_argument(
        '--model',
        '-m',
        choices=model_cfgs.keys(),
        default='detnas_small_coco',
        help='Convnet models')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument(
        '--batchsize', type=int, help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float)
    parser.add_argument('--weight_decay', type=float)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](
        n_class=len(label_names), **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']
    model = Classifier(extractor)

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(train_data, TrainTransform(extractor.mean))
    val_data = TransformDataset(val_data, ValTransform(extractor.mean))
    print('finished loading dataset')

    train_indices = np.arange(len(train_data)//(100 if args.trial else 1))
    val_indices = np.arange(len(val_data))


    """
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    """
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(
        val_data,
        args.batchsize,
        repeat=False,
        shuffle=False,
        n_processes=args.loaderjob)

    optimizer = CorrectedMomentumSGD(lr=args.lr, momentum=args.momentum)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if args.gpu != -1:
        model.to_gpu(args.gpu)

    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=args.gpu)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    trainer.extend(LinearShift('lr', (args.lr, 0.0),
                   (0, len(train_indices) / args.batchsize)))
    evaluator = extensions.Evaluator(val_iter, model)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    trainer.extend(
        chainer.training.extensions.observe_lr(), trigger=log_interval)
    trainer.extend(
        extensions.snapshot_object(extractor,
                                   'snapshot_model_{.updater.epoch}.npz'),
        trigger=(args.epoch, 'epoch'))
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(
        extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/loss', 'main/accuracy',
            'validation/main/accuracy'
        ]),
        trigger=print_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 12
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--batchsize', type=int, default=8)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--iteration', type=int, default=10000)
    parser.add_argument('--interval', type=int, default=1000)
    parser.add_argument('--resume')
    args = parser.parse_args()

    label_names = pose_bbox_label_names
    BboxDataset = PoseBboxDataset

    model = SSD300(n_fg_class=len(label_names),
                   pretrained_model='./models/imagenet.npz')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(BboxDataset(split='trainval'),
                             Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

    test = BboxDataset(split='test')
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (args.iteration, 'iteration'),
                               args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=2e-5),
                   trigger=triggers.ManualScheduleTrigger(
                       [int(args.iteration * 0.8),
                        int(args.iteration * 0.9)], 'iteration'))

    trainer.extend(DetectionEvaluator(test_iter,
                                      model,
                                      use_07_metric=True,
                                      label_names=label_names),
                   trigger=(args.interval, 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot_object(
        model, 'pose_iter_{.updater.iteration}.npz'),
                   trigger=(args.iteration // 2, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 13
0
                    args.label_dim_ft,
                    1,
                    1,
                    0,
                    init_weights=None,
                    pool=None,
                    nobias=False))

print 'Setup optimizer'
opt = SetupOptimizer(model)

# Use lower learning rate for pretrained parts
for name, param in opt.target.namedparams():
    if name.startswith('/predictor/'):
        param.update_rule.hyperparam.lr = args.optimizer['lr_pretrained']
opt.add_hook(WeightDecay(0.0005))
# opt.add_hook(GradientClipping(2.0))

# Resume training from a checkpoint
if args.checkpoint > 0:
    print 'Resume training from checkpoint'
    # Load model weights
    model = ResumeFromCheckpoint(
        '%s/checkpoints/%s_%s_iter_%d.chainermodel' %
        (args.project_folder, args.dataset, args.train_set[6:],
         args.checkpoint), model)
    # Load optimizer status
    serializers.load_npz(
        '%s/checkpoints/%s_%s_iter_%d.chaineropt' %
        (args.project_folder, args.dataset, args.train_set[6:],
         args.checkpoint), opt)
Exemplo n.º 14
0
def train(args):
    model = FastBiaffineLSTMParser(args.model, args.word_emb_size,
                                   args.afix_emb_size, args.nlayers,
                                   args.hidden_dim, args.dep_dim,
                                   args.dropout_ratio)
    with open(args.model + "/params", "w") as f:
        log(args, f)

    if args.initmodel:
        print('Load model from', args.initmodel)
        chainer.serializers.load_npz(args.initmodel, model)

    if args.pretrained:
        print('Load pretrained word embeddings from', args.pretrained)
        model.load_pretrained_embeddings(args.pretrained)

    if args.gpu >= 0:
        chainer.cuda.get_device(args.gpu).use()
        model.to_gpu()

    if args.tritrain is not None:
        train = LSTMParserTriTrainDataset(args.model, args.train,
                                          args.tritrain, args.tri_weight)
    else:
        train = LSTMParserDataset(args.model, args.train)

    train_iter = chainer.iterators.SerialIterator(train, args.batchsize)
    val = LSTMParserDataset(args.model, args.val)
    val_iter = chainer.iterators.SerialIterator(val,
                                                args.batchsize,
                                                repeat=False,
                                                shuffle=False)
    optimizer = chainer.optimizers.Adam(beta2=0.9)
    # optimizer = chainer.optimizers.MomentumSGD(momentum=0.7)
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(2e-6))
    optimizer.add_hook(GradientClipping(15.))
    updater = MyUpdater(train_iter,
                        optimizer,
                        device=args.gpu,
                        converter=converter)
    trainer = training.Trainer(updater, (args.epoch, 'epoch'), args.model)

    val_interval = 1000, 'iteration'
    log_interval = 200, 'iteration'

    eval_model = model.copy()
    eval_model.train = False

    trainer.extend(extensions.ExponentialShift("eps",
                                               .75,
                                               init=2e-3,
                                               optimizer=optimizer),
                   trigger=(2500, 'iteration'))
    trainer.extend(MyEvaluator(val_iter,
                               eval_model,
                               converter,
                               device=args.gpu),
                   trigger=val_interval)
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=val_interval)
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(observation_key="eps"),
                   trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'main/tagging_accuracy', 'main/tagging_loss',
        'main/parsing_accuracy', 'main/parsing_loss',
        'validation/main/tagging_accuracy', 'validation/main/parsing_accuracy',
        'eps'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 15
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')
    args = parser.parse_args()

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=len(voc_detection_label_names),
                       pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=len(voc_detection_label_names),
                       pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    train = TransformDataset(
        ConcatenatedDataset(VOCDetectionDataset(year='2007', split='trainval'),
                            VOCDetectionDataset(year='2012',
                                                split='trainval')),
        Transform(model.coder, model.insize, model.mean))
    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test = VOCDetectionDataset(year='2007',
                               split='test',
                               use_difficult=True,
                               return_difficult=True)
    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)
    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    trainer.extend(DetectionVOCEvaluator(
        test_iter,
        model,
        use_07_metric=True,
        label_names=voc_detection_label_names),
                   trigger=(10000, 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport([
        'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
        'main/loss/conf', 'validation/main/map'
    ]),
                   trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
                   trigger=(120000, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 16
0
    def fit(self, X, y, x_test=None, data_name=None, n_epoch=1000, batch_size=20, freq_print_loss=10, freq_plot=50,
            n_samples=100):
        """
        モデルのパラメータチューニングの開始
        :param np.ndarray X:
        :param np.ndarray y:
        :param np.ndarray | None x_test:
        :param str | None data_name:
        :param int n_epoch:
        :param int batch_size:
        :param int freq_print_loss:
        :param int freq_plot:
        :param int n_samples: 事後分布プロットの際の事後分布のサンプリング数.
        :return: self
        """
        conditions = self.conditions
        output_dir = "data/{data_name}/{conditions}".format(**locals())
        # 画像の出力先作成
        if os.path.exists(output_dir) is False:
            os.makedirs(output_dir)

        X, y = self.preprocess(X, y)
        if x_test is not None:
            x_test = self.x_transformer.transform(x_test)

        N = X.shape[0]

        # Variable 型への変換
        X = Variable(preprocess_array_format(X))
        y = Variable(preprocess_array_format(y))
        if x_test is not None:
            x_test = Variable(preprocess_array_format(x_test))

        self.optimizer.setup(self.model)
        self.optimizer.add_hook(WeightDecay(self.weight_decay))
        list_loss = []

        for e in range(1, n_epoch + 1):
            perm = np.random.permutation(N)
            for i in range(0, N, batch_size):
                idx = perm[i: i + batch_size]
                _x = X[idx]
                _y = y[idx]
                self.model.zerograds()
                loss = F.mean_squared_error(self.model(_x, apply_input=self.apply_input), _y)
                loss.backward()
                self.optimizer.update()

            l = F.mean_squared_error(self.model(X, False, False), y).data
            if e % freq_print_loss == 0:
                print("epoch: {e}\tloss:{l}".format(**locals()))

            if e % freq_plot == 0:
                fig, ax = self.plot_posterior(x_test, X.data, y.data, n_samples=n_samples)
                ax.set_title("epoch:{0:04d}".format(e))
                fig.tight_layout()
                file_path = os.path.join(output_dir, "epoch={e:04d}.png".format(**locals()))
                fig.savefig(file_path, dpi=150)
                plt.close("all")
            list_loss.append([e, l])

        save_logloss(list_loss, self.model.__str__())
Exemplo n.º 17
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('--model',
                        choices=('ssd300', 'ssd512'),
                        default='ssd300')
    parser.add_argument('--batchsize', type=int, default=32)
    parser.add_argument('--labelnum', type=int, default=50)
    parser.add_argument('--gpu', type=int, default=-1)
    parser.add_argument('--out', default='result')
    parser.add_argument('--resume')

    parser.add_argument('--image_label',
                        '-il',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox', help='Path to training bbox list file')
    parser.add_argument('--image_label_test',
                        '-ilt',
                        help='Path to training image-label list file')
    parser.add_argument('--bbox_test', help='Path to training bbox list file')

    parser.add_argument('--image_root',
                        '-TR',
                        default='.',
                        help='Root directory path of image files')

    args = parser.parse_args()

    comm = chainermn.create_communicator('naive')
    if comm.mpi_comm.rank == 0:
        print('==========================================')
        print('Num process (COMM_WORLD): {}'.format(MPI.COMM_WORLD.Get_size()))

    if args.model == 'ssd300':
        model = SSD300(n_fg_class=args.labelnum, pretrained_model='imagenet')
    elif args.model == 'ssd512':
        model = SSD512(n_fg_class=args.labelnum, pretrained_model='imagenet')

    model.use_preset('evaluate')
    train_chain = MultiboxTrainChain(model)
    if args.gpu >= 0:
        chainer.cuda.get_device_from_id(args.gpu).use()
        model.to_gpu()

    from test_datasets import DeepFashionBboxDataset

    if comm.rank == 0:
        train = DeepFashionBboxDataset(args.bbox, args.image_label,
                                       args.image_root)
        test = DeepFashionBboxDataset(args.bbox_test, args.image_label_test,
                                      args.image_root)

        train = TransformDataset(
            train, Transform(model.coder, model.insize, model.mean))
    else:
        train, test = None, None
    train = chainermn.scatter_dataset(train, comm, shuffle=True)
    test = chainermn.scatter_dataset(test, comm, shuffle=True)

    train_iter = chainer.iterators.MultiprocessIterator(train, args.batchsize)

    test_iter = chainer.iterators.SerialIterator(test,
                                                 args.batchsize,
                                                 repeat=False,
                                                 shuffle=False)

    # initial lr is set to 1e-3 by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer = chainermn.create_multi_node_optimizer(optimizer, comm)
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=args.gpu)
    trainer = training.Trainer(updater, (120000, 'iteration'), args.out)

    checkpoint_interval = (1000, 'iteration')

    checkpointer = chainermn.create_multi_node_checkpointer(
        name='imagenet-example', comm=comm)
    checkpointer.maybe_load(trainer, optimizer)
    trainer.extend(checkpointer, trigger=checkpoint_interval)

    trainer.extend(extensions.ExponentialShift('lr', 0.1, init=1e-3),
                   trigger=triggers.ManualScheduleTrigger([80000, 100000],
                                                          'iteration'))

    evaluator = DetectionVOCEvaluator(test_iter,
                                      model,
                                      use_07_metric=True,
                                      label_names=voc_bbox_label_names)

    evaluator = chainermn.create_multi_node_evaluator(evaluator, comm)
    trainer.extend(evaluator, trigger=(10000, 'iteration'))

    if comm.rank == 0:
        log_interval = 10, 'iteration'
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.observe_lr(), trigger=log_interval)
        trainer.extend(extensions.PrintReport([
            'epoch', 'iteration', 'lr', 'main/loss', 'main/loss/loc',
            'main/loss/conf', 'validation/main/map'
        ]),
                       trigger=log_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

        trainer.extend(extensions.snapshot(), trigger=(10000, 'iteration'))
        trainer.extend(extensions.snapshot_object(
            model, 'model_iter_{.updater.iteration}'),
                       trigger=(120000, 'iteration'))

    if args.resume:
        serializers.load_npz(args.resume, trainer)

    trainer.run()
Exemplo n.º 18
0
def train(train_data, val_data, label_names,
          iteration, lr, step_points,
          batchsize, gpu, out, val_iteration,
          log_iteration, loaderjob,
          resume):

    model = SSD300(n_fg_class=len(label_names), pretrained_model='imagenet')
    model.use_preset('evaluate')

    train_chain = MultiboxTrainChain(model)  # alpha and k?

    if gpu >= 0:
        chainer.cuda.get_device_from_id(gpu).use()
        model.to_gpu()

    train_data = TransformDataset(
        train_data,
        Transform(model.coder, model.insize, model.mean))
    if loaderjob <= 0:
        train_iter = chainer.iterators.SerialIterator(train_data, batchsize)
    else:
        train_iter = chainer.iterators.MultiprocessIterator(train_data, batchsize)  # , n_processes=min((loaderjob, batchsize)))

    val_iter = chainer.iterators.SerialIterator(val_data, batchsize, repeat=False, shuffle=False)  # think about repeat

    # initial lr is set to 1e-4 (default run_train) by ExponentialShift
    optimizer = chainer.optimizers.MomentumSGD()
    optimizer.setup(train_chain)
    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    updater = training.StandardUpdater(train_iter, optimizer, device=gpu)
    trainer = training.Trainer(updater, (iteration, 'iteration'), out)
    trainer.extend(
        extensions.ExponentialShift('lr', 0.1, init=lr),
        trigger=triggers.ManualScheduleTrigger(step_points, 'iteration'))

    val_interval = (val_iteration, 'iteration')
    trainer.extend(
        DetectionVOCEvaluator(
            val_iter, model, use_07_metric=True,
            label_names=label_names),
        trigger=val_interval)

    ###
    # Logging
    log_interval = log_iteration, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'lr',
         'main/loss', 'main/loss/loc', 'main/loss/conf',
         'validation/main/map']),
        trigger=log_interval)
    trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.extend(extensions.snapshot(), trigger=val_interval)
    trainer.extend(
        extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'),
        trigger=val_interval)
    ###

    if resume:
        serializers.load_npz(resume, trainer)

    trainer.run()
Exemplo n.º 19
0
def main():
    model_cfgs = {
        'resnet50': {'class': ResNet50, 'score_layer_name': 'fc6',
                     'kwargs': {'arch': 'fb'}},
        'resnet101': {'class': ResNet101, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}},
        'resnet152': {'class': ResNet152, 'score_layer_name': 'fc6',
                      'kwargs': {'arch': 'fb'}}
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')

    parser.add_argument('--export', type=str, default=None,
                        help='Export the model to ONNX')
    parser.add_argument('--compile', type=str, default=None,
                        help='Compile the model')
    parser.add_argument('--computation_order', type=str, default=None,
                        help='Computation order in backpropagation')

    parser.add_argument('--model',
                        '-m', choices=model_cfgs.keys(), default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator', type=str,
                        default='pure_nccl', help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize', type=int, default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight-decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    parser.add_argument('--iterations', '-I', type=int, default=None,
                        help='Number of iterations to train')
    parser.add_argument('--no_use_fixed_batch_dataset',
                        dest='use_fixed_batch_dataset',
                        action='store_false',
                        help='Disable the use of FixedBatchDataset')
    parser.add_argument('--compiler-log', action='store_true',
                        help='Enables compile-time logging')
    parser.add_argument('--trace', action='store_true',
                        help='Enables runtime tracing')
    parser.add_argument('--verbose', action='store_true',
                        help='Enables runtime verbose log')
    parser.add_argument('--skip_runtime_type_check', action='store_true',
                        help='Skip runtime type check')
    parser.add_argument('--dump_memory_usage', type=int, default=0,
                        help='Dump memory usage (0-2)')
    parser.add_argument('--quiet_period', type=int, default=0,
                        help='Quiet period after runtime report')
    parser.add_argument('--overwrite_batchsize', action='store_true',
                        help='Overwrite batch size')
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](
        n_class=len(label_names), **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']

    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in extractor.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    if args.export is not None:
        chainer_compiler.use_unified_memory_allocator()
        extractor.to_device(device)
        x = extractor.xp.zeros((args.batchsize, 3, 224, 224)).astype('f')
        chainer_compiler.export(extractor, [x], args.export)
        return

    if args.compile is not None:
        print('run compiled model')
        chainer_compiler.use_chainerx_shared_allocator()
        extractor.to_device(device)
        # init params
        with chainer.using_config('enable_backprop', False),\
                chainer.using_config('train', False):
            x = extractor.xp.zeros((1, 3, 224, 224)).astype('f')
            extractor(x)

        compiler_kwargs = {}
        if args.compiler_log:
            compiler_kwargs['compiler_log'] = True
        runtime_kwargs = {}
        if args.trace:
            runtime_kwargs['trace'] = True
        if args.verbose:
            runtime_kwargs['verbose'] = True
        if args.skip_runtime_type_check:
            runtime_kwargs['check_types'] = False
        if args.dump_memory_usage >= 1:
            runtime_kwargs['dump_memory_usage'] = args.dump_memory_usage
            free, total = cupy.cuda.runtime.memGetInfo()
            used = total - free
            runtime_kwargs['base_memory_usage'] = used

        onnx_filename = args.compile
        if args.overwrite_batchsize:
            new_onnx_filename = ('/tmp/overwrite_batchsize_' +
                                 os.path.basename(onnx_filename))
            new_input_types = [
                input_rewriter.Type(shape=(args.batchsize, 3, 224, 224))
            ]
            input_rewriter.rewrite_onnx_file(onnx_filename,
                                             new_onnx_filename,
                                             new_input_types)
            onnx_filename = new_onnx_filename

        extractor_cc = chainer_compiler.compile_onnx(
            extractor,
            onnx_filename,
            'onnx_chainer',
            computation_order=args.computation_order,
            compiler_kwargs=compiler_kwargs,
            runtime_kwargs=runtime_kwargs,
            quiet_period=args.quiet_period)
        model = Classifier(extractor_cc)
    else:
        print('run vanilla chainer model')
        model = Classifier(extractor)

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(
        train_data, ('img', 'label'), TrainTransform(extractor.mean))
    val_data = TransformDataset(
        val_data, ('img', 'label'), ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(
        train_indices, comm, shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    if args.use_fixed_batch_dataset:
        train_data = FixedBatchDataset(train_data, args.batchsize)
        val_data = FixedBatchDataset(val_data, args.batchsize)
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(
        val_data, args.batchsize,
        repeat=False, shuffle=False, n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(
        train_iter, optimizer, device=device)

    if args.iterations:
        stop_trigger = (args.iterations, 'iteration')
    else:
        stop_trigger = (args.epoch, 'epoch')
    trainer = training.Trainer(
        updater, stop_trigger, out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(
            extensions.snapshot_object(
                extractor, 'snapshot_model_{.updater.epoch}.npz'),
            trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport(
            ['iteration', 'epoch', 'elapsed_time', 'lr',
             'main/loss', 'validation/main/loss',
             'main/accuracy', 'validation/main/accuracy']
        ), trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 20
0
print 'Prepare Dataset'
datasets = DataChef.PrepData(args)

print 'Initialize Model'
predictor = Models.InceptionV3(args)
classifiers = [Models.Classifier(label_dim) for label_dim in args.label_dim]
model = Models.InceptionV3Classifier(predictor, classifiers, args)

print 'Setup optimizer'
opt = SetupOptimizer(model)

# Use lower learning rate for pretrained parts
for name, param in opt.target.namedparams():
    if name.startswith('/predictor/'):
        param.update_rule.hyperparam.lr = args.optimizer['lr_pretrained']
opt.add_hook(WeightDecay(args.optimizer['weightdecay']))
opt.add_hook(GradientClipping(args.optimizer['gradientclipping']))

# Resume training from a checkpoint
if args.checkpoint > 0:
    print 'Resume training from checkpoint'
    # Load model weights
    ResumeFromCheckpoint(
        '%s/checkpoints/%s_iter_%d_%s.chainermodel' %
        (args.project_folder, args.dataset, args.checkpoint, args.suffix),
        model)
    # Load optimizer status
    serializers.load_npz(
        '%s/checkpoints/%s_iter_%d_%s.chaineropt' %
        (args.project_folder, args.dataset, args.checkpoint, args.suffix), opt)
    # Adjust the learning rate
Exemplo n.º 21
0
def main(config):
    opts = config()

    comm = chainermn.create_communicator(opts.communicator)
    device = comm.intra_rank

    backborn_cfg = opts.backborn_cfg

    df = pd.read_csv(opts.path_data + opts.train_df).sample(frac=1)

    ################### pseudo labeling #########################
    if opts.pseudo_labeling_path is not None:
        test_df = pd.read_csv(opts.path_data + opts.test_df)
        labels = np.load(opts.pseudo_labeling_path, allow_pickle=False)
        labels = np.concatenate((labels, labels))
        count = 0
        valid_array = []
        valid_sirna = []
        for i, label in enumerate(labels):
            if label.max() > 0.0013:
                count = count + 1
                valid_array.append(i)
                valid_sirna.append(label.argmax())
        print(count)
        pseudo_df = test_df.iloc[valid_array, :]
        pseudo_df["sirna"] = valid_sirna
        pseudo_df = pseudo_df
        df = pd.concat([df, pseudo_df]).sample(frac=1)
    ################### pseudo labeling #########################

    for i, (train_df, valid_df) in enumerate(
            stratified_groups_kfold(df,
                                    target=opts.fold_target,
                                    n_splits=opts.fold)):
        if comm.rank == 0:
            train_df.to_csv(
                opts.path_data + 'train' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            valid_df.to_csv(
                opts.path_data + 'valid' + '_fold' + str(i) + '.csv',
                columns=[
                    'id_code', 'experiment', 'plate', 'well', 'sirna',
                    'filename', 'cell', 'site'
                ])
            print("Save a csvfile of fold_" + str(i))
        dataset = opts.dataset
        train_dataset = dataset(train_df, opts.path_data)
        val_dataset = dataset(valid_df, opts.path_data)

        backborn = chcv2_get_model(
            backborn_cfg['name'],
            pretrained=backborn_cfg['pretrain'],
            in_size=opts.input_shape)[backborn_cfg['layer']]

        model = opts.model(backborn=backborn).copy(mode='init')
        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()

        mean = opts.mean

        train_data = TransformDataset(train_dataset, opts.train_transform)
        val_data = TransformDataset(val_dataset, opts.valid_trainsform)

        if comm.rank == 0:
            train_indices = train_data
            val_indices = val_data
        else:
            train_indices = None
            val_indices = None

        train_data = chainermn.scatter_dataset(train_indices,
                                               comm,
                                               shuffle=True)
        val_data = chainermn.scatter_dataset(val_indices, comm, shuffle=False)
        train_iter = chainer.iterators.MultiprocessIterator(
            train_data,
            opts.batchsize,
            shuffle=True,
            n_processes=opts.loaderjob)
        val_iter = chainer.iterators.MultiprocessIterator(
            val_data,
            opts.batchsize,
            repeat=False,
            shuffle=False,
            n_processes=opts.loaderjob)
        print('finished loading dataset')

        if device >= 0:
            chainer.cuda.get_device(device).use()
            model.to_gpu()
        if opts.optimizer == "CorrectedMomentumSGD":
            optimizer = chainermn.create_multi_node_optimizer(
                CorrectedMomentumSGD(lr=opts.lr), comm)
        elif opts.optimizer == "NesterovAG":
            optimizer = chainermn.create_multi_node_optimizer(
                NesterovAG(lr=opts.lr), comm)
        else:
            optimizer = chainermn.create_multi_node_optimizer(
                Adam(alpha=opts.alpha,
                     weight_decay_rate=opts.weight_decay,
                     adabound=True,
                     final_lr=0.5), comm)

        optimizer.setup(model)
        if opts.optimizer == "CorrectedMomentumSGD":
            for param in model.params():
                if param.name not in ('beta', 'gamma'):
                    param.update_rule.add_hook(WeightDecay(opts.weight_decay))

        if opts.fc_lossfun == 'softmax_cross_entropy':
            fc_lossfun = F.softmax_cross_entropy
        elif opts.fc_lossfun == 'focal_loss':
            if opts.ls:
                focal_loss = FocalLoss(label_smoothing=True)
            else:
                focal_loss = FocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss':
            if opts.ls:
                focal_loss = AutoFocalLoss(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        elif opts.fc_lossfun == 'auto_focal_loss_bce':
            if opts.ls:
                focal_loss = AutoFocalLossBCE(label_smoothing=True)
            else:
                focal_loss = AutoFocalLoss()
            fc_lossfun = focal_loss.loss
        if opts.metric_lossfun == 'arcface':
            arcface = ArcFace()
            metric_lossfun = arcface.loss
        elif opts.metric_lossfun == 'adacos':
            adacos = AdaCos()
            metric_lossfun = adacos.loss

        updater = opts.updater(train_iter,
                               optimizer,
                               model,
                               device=device,
                               max_epoch=opts.max_epoch,
                               fix_sche=opts.fix_sche,
                               metric_lossfun=metric_lossfun,
                               fc_lossfun=fc_lossfun,
                               metric_w=opts.metric_w,
                               fc_w=opts.fc_w)
        evaluator = chainermn.create_multi_node_evaluator(
            opts.evaluator(val_iter,
                           model,
                           device=device,
                           max_epoch=opts.max_epoch,
                           fix_sche=opts.fix_sche,
                           metric_lossfun=metric_lossfun,
                           fc_lossfun=fc_lossfun,
                           metric_w=opts.metric_w,
                           fc_w=opts.fc_w), comm)

        trainer = training.Trainer(updater, (opts.max_epoch, 'epoch'),
                                   out=opts.out + '_fold' + str(i))

        if opts.optimizer == "CorrectedMomentumSGD":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        elif opts.optimizer == "NesterovAG":
            trainer.extend(extensions.ExponentialShift('lr', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))
        else:
            trainer.extend(extensions.ExponentialShift('alpha', opts.shift_lr),
                           trigger=ManualScheduleTrigger(
                               opts.lr_points, 'epoch'))

        trainer.extend(evaluator, trigger=(int(opts.max_epoch / 10), 'epoch'))
        #         trainer.extend(evaluator, trigger=(int(1), 'epoch'))
        log_interval = 0.1, 'epoch'
        print_interval = 0.1, 'epoch'

        if comm.rank == 0:
            trainer.extend(chainer.training.extensions.observe_lr(),
                           trigger=log_interval)
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model' + '_{.updater.epoch}.npz'),
                           trigger=(opts.max_epoch / 10, 'epoch'))
            trainer.extend(extensions.snapshot_object(
                model, 'snapshot_model_f1max.npz'),
                           trigger=chainer.training.triggers.MaxValueTrigger(
                               'validation/main/accuracy',
                               trigger=(opts.max_epoch / 10, 'epoch')))
            trainer.extend(extensions.LogReport(trigger=log_interval))
            trainer.extend(extensions.PrintReport([
                'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
                'main/face_loss', 'main/ce_loss', 'main/accuracy',
                'validation/main/loss', 'validation/main/face_loss',
                'validation/main/ce_loss', 'validation/main/accuracy'
            ]),
                           trigger=print_interval)
            trainer.extend(extensions.ProgressBar(update_interval=10))
        trainer.run()
Exemplo n.º 22
0
def train(train_file,
          test_file,
          embed_file,
          n_epoch=20,
          batch_size=20,
          gpu=-1,
          save=None):

    # Load files
    Log.i('initialize preprocessor with %s' % embed_file)
    processor = Preprocessor(embed_file)
    reader = CorpusReader(processor)
    Log.i('load train dataset from %s' % str(train_file))
    train_dataset = reader.load(train_file, train=True)
    Log.i('load test dataset from %s' % str(test_file))
    test_dataset = reader.load(test_file, train=False)

    hparams = {
        'dropout_ratio': 0.2,
        'adagrad_lr': 0.2,
        'weight_decay': 0.0001,
    }

    Log.v('')
    Log.v("initialize ...")
    Log.v('--------------------------------')
    Log.i('# Minibatch-size: %d' % batch_size)
    Log.i('# epoch: %d' % n_epoch)
    Log.i('# gpu: %d' % gpu)
    Log.i('# hyper-parameters: %s' % str(hparams))
    Log.v('--------------------------------')
    Log.v('')

    # Set up a neural network
    cls = BLSTMCRF if _use_crf else BLSTM
    model = cls(
        embeddings=processor.embeddings,
        n_labels=4,
        dropout=hparams['dropout_ratio'],
        train=True,
    )
    if gpu >= 0:
        cuda.get_device(gpu).use()
        model.to_gpu()
    eval_model = model.copy()
    eval_model.train = False

    # Setup an optimizer
    optimizer = optimizers.AdaGrad(lr=hparams['adagrad_lr'])
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(hparams['weight_decay']))

    def _update(optimizer, loss):
        optimizer.target.zerograds()
        loss.backward()
        optimizer.update()

    def _process(dataset, model):
        size = len(dataset)
        batch_count = 0
        loss = 0.0
        accuracy = 0.0

        p = ProgressBar(min_value=0, max_value=size, fd=sys.stderr).start()
        for i, (xs, ys) in enumerate(
                dataset.batch(batch_size, colwise=True, shuffle=model.train)):
            p.update((batch_size * i) + 1)
            batch_count += 1
            batch_loss, batch_accuracy = model(xs, ys)
            loss += batch_loss.data
            accuracy += batch_accuracy
            if model.train:
                _update(optimizer, batch_loss)

        p.finish()
        Log.i("[%s] epoch %d - #samples: %d, loss: %f, accuracy: %f" %
              ('training' if model.train else 'evaluation', epoch + 1, size,
               loss / batch_count, accuracy / batch_count))

    for epoch in range(n_epoch):
        _process(train_dataset, model)
        _process(test_dataset, eval_model)
        Log.v('-')

    if save is not None:
        Log.i("saving the model to %s ..." % save)
        serializers.save_npz(save, model)
Exemplo n.º 23
0
chainer.config.autotune = True

"""Creating Iterators for training. The Transform function is used on train_dataset."""

transformed_train_dataset = TransformDataset(train_dataset, Transform(model.coder, model.insize, model.mean))

train_iter = chainer.iterators.MultiprocessIterator(transformed_train_dataset, batchsize)
valid_iter = chainer.iterators.SerialIterator(valid_dataset, batchsize, repeat=False, shuffle=False)

optimizer = chainer.optimizers.MomentumSGD()
optimizer.setup(train_chain)
for param in train_chain.params():
    if param.name == 'b':
        param.update_rule.add_hook(GradientScaling(2))
    else:
        param.update_rule.add_hook(WeightDecay(0.0005))

updater = training.updaters.StandardUpdater(
    train_iter, optimizer, device=gpu_id)

trainer = training.Trainer(
    updater,
    (training_epoch, 'epoch'), out)

trainer.extend(
    extensions.ExponentialShift('lr', lr_decay_rate, init=initial_lr),
    trigger=triggers.ManualScheduleTrigger(lr_decay_timing, 'epoch'))

trainer.extend(
    DetectionVOCEvaluator(
        valid_iter, model, use_07_metric=False,
Exemplo n.º 24
0
def main():
    parser = ArgumentParser()

    parser.add_argument('train_data', help='train data')
    parser.add_argument('train_labels', help='train labels')
    parser.add_argument('--val-data', default=None, help='val data')
    parser.add_argument('--val-labels', default=None, help='val labels')
    parser.add_argument('-b',
                        '--batch-size',
                        type=int,
                        default=5,
                        help='mini-batch size (default=5)')
    parser.add_argument('--beta2',
                        type=float,
                        default=0.999,
                        help='beta2 of Adam (default=0.999)')
    parser.add_argument('-g',
                        '--gpu-id',
                        type=int,
                        default=-1,
                        help='GPU ID (default=-1, indicates CPU)')
    parser.add_argument('--ignore-labels',
                        type=int,
                        default=[],
                        nargs='+',
                        help='labels to ignore (default=[])')
    parser.add_argument('-l',
                        '--learning-rate',
                        type=float,
                        default=0.1,
                        help='learning rate (default=0.1)')
    parser.add_argument('--max-iter',
                        type=int,
                        default=160000,
                        help='train model up to max-iter (default=160000)')
    parser.add_argument(
        '--mean-interval',
        type=int,
        default=1000,
        help='calculate mean of train/loss (and validation loss) ' +
        'every mean-interval iters (default=1000)')
    parser.add_argument('--model',
                        default=None,
                        help='resume to train the model')
    parser.add_argument('--momentum',
                        type=float,
                        default=0.9,
                        help='momentum rate (default=0.9)')
    parser.add_argument('--n-classes',
                        type=int,
                        default=5,
                        help='number of classes (default=5)')
    parser.add_argument('--noise',
                        default='no',
                        help='noise injection method. \'no\', \'patch\', ' +
                        'and \'permutation\' are available (default=\'no\')')
    parser.add_argument('--optim',
                        default='nesterov',
                        help='optimization method. \'sgd\', \'nesterov\', ' +
                        'and \'adam\' are available (default=\'nesterov\')')
    parser.add_argument(
        '-o',
        '--outdir',
        default='./',
        help='trained models and optimizer states are stored in outdir ' +
        '(default=\'./\')')
    parser.add_argument(
        '--queue-maxsize',
        type=int,
        default=10,
        help='maxsize of queues for training and validation (default=10)')
    parser.add_argument(
        '--save-interval',
        type=int,
        default=10000,
        help='save model & optimizer every save-interval iters (default=10000)'
    )
    parser.add_argument(
        '--state',
        default=None,
        help='optimizer state. resume to train the model with the optimizer')
    parser.add_argument('-w',
                        '--weight-decay',
                        type=float,
                        default=1e-4,
                        help='weight decay factor (default=1e-4)')

    args = parser.parse_args()

    print(argv2string(sys.argv) + '\n')
    for arg in dir(args):
        if arg[:1] == '_':
            continue
        print('{} = {}'.format(arg, getattr(args, arg)))
    print()

    if not os.path.isdir(args.outdir):
        os.makedirs(args.outdir)
        print('mkdir ' + args.outdir + '\n')

    model = Model(in_ch=3, out_ch=args.n_classes)
    if args.model is not None:
        S.load_npz(args.model, model)
    loss_func = Loss(model)

    if args.optim.lower() in 'sgd':
        if args.momentum > 0:
            optim = optims.CorrectedMomentumSGD(lr=args.learning_rate,
                                                momentum=args.momentum)
        else:
            optim = optims.SGD(lr=args.learning_rate)
    elif args.optim.lower() in 'nesterovag':
        optim = optims.NesterovAG(lr=args.learning_rate,
                                  momentum=args.momentum)
    elif args.optim.lower() in 'adam':
        optim = optims.Adam(alpha=args.learning_rate,
                            beta1=args.momentum,
                            beta2=args.beta2,
                            weight_decay_rate=args.weight_decay,
                            amsgrad=True)
    else:
        raise ValueError('Please specify an available optimizer name.\n' +
                         'SGD, NesterovAG, and Adam are available.')

    print('{}\n'.format(type(optim)))
    optim.setup(model)

    if args.state is not None:
        S.load_npz(args.state, optim)

    if (args.weight_decay > 0) and not isinstance(optim, optims.Adam):
        optim.add_hook(WeightDecay(args.weight_decay))

    optim.add_hook(GradientClipping(1))

    lr_decay_iter_dict = {
        int(5 * args.max_iter / 8): 0.1,
        int(7 * args.max_iter / 8): 0.1,
    }

    with open(args.train_data, 'r') as f:
        train_data_path_list = [line.strip() for line in f.readlines()]
    with open(args.train_labels, 'r') as f:
        train_labels_path_list = [line.strip() for line in f.readlines()]

    assert len(train_data_path_list) == len(train_labels_path_list)

    if (args.val_data is not None) or (args.val_labels is not None):
        if (args.val_data is not None) and (args.val_labels is not None):
            with open(args.val_data, 'r') as f:
                val_data_path_list = [line.strip() for line in f.readlines()]
            with open(args.val_labels, 'r') as f:
                val_labels_path_list = [line.strip() for line in f.readlines()]
            assert len(val_data_path_list) == len(val_labels_path_list)
        else:
            raise ValueError('Either val_data or val_labels is not specified.')

    train_queue = mp.Queue(maxsize=args.queue_maxsize)
    train_generator = BatchGenerator(args.batch_size,
                                     train_data_path_list,
                                     train_labels_path_list,
                                     train_queue,
                                     train=True,
                                     noise_injection=args.noise,
                                     out_height=512,
                                     out_width=512,
                                     max_height=1216,
                                     max_width=1216,
                                     min_height=832,
                                     min_width=832)
    train_generator.start()

    if args.val_data is None:
        val_queue = None
    else:
        val_queue = mp.Queue(maxsize=args.queue_maxsize)
        try:
            val_generator = BatchGenerator(1,
                                           val_data_path_list,
                                           val_labels_path_list,
                                           val_queue,
                                           train=False,
                                           out_height=608,
                                           out_width=968)
            val_generator.start()
        except Exception:
            train_generator.terminate()
            train_queue.close()
            val_queue.close()
            raise

    try:
        train(loss_func, optim, train_queue, args.max_iter, args.mean_interval,
              args.save_interval, val_queue, lr_decay_iter_dict, args.gpu_id,
              args.ignore_labels, args.outdir)
    except BaseException:
        train_generator.terminate()
        train_queue.close()
        if val_queue is not None:
            val_generator.terminate()
            val_queue.close()
        raise

    train_generator.terminate()
    train_queue.close()
    if val_queue is not None:
        val_generator.terminate()
        val_queue.close()
Exemplo n.º 25
0
def run(input_dir, test_dir, output, batch_size,
        iterator='SerialIterator',
        device=-1, pretrained_model='', save_trigger=10000,
        test_trigger=1000,
        parser_module='XMLParser',
        train_module='MultiboxTrainChain',
        model_module='chainercv.links.SSD300'):
    pretrained_model = join(PROJECT_DIR, pretrained_model)
    if pretrained_model and os.path.isfile(pretrained_model):
        print('Pretrained model {} loaded.'.format(pretrained_model))
    else:
        print('Pretrained model file not found, ' +
              'using imagenet as default.')
        pretrained_model = 'imagenet'
    parser = _import_module('multibuildingdetector.parsers.{}', parser_module)

    model = _import_class(model_module)(n_fg_class=len(parser.LABEL_NAMES),
                                        pretrained_model=pretrained_model)
    model.use_preset('evaluate')
    train_chain = _import_class('multibuildingdetector.trainchains.{}'
                                .format(train_module))(model)
    if device >= 0:
        chainer.cuda.get_device_from_id(device).use()
        model.to_gpu()

    train, test = load_train_test_set(input_dir, test_dir, parser)

    augmented_train = TransformDataset(
        train,
        ImageAugmentation(model.coder, model.insize, model.mean))
    train_iter = getattr(chainer.iterators, iterator)(augmented_train,
                                                      batch_size)

    optimizer = chainer.optimizers.Adam()
    optimizer.setup(train_chain)

    for param in train_chain.params():
        if param.name == 'b':
            param.update_rule.add_hook(GradientScaling(2))
        else:
            param.update_rule.add_hook(WeightDecay(0.0005))

    if device is None:
        updater = chainer.training.StandardUpdater(train_iter, optimizer)
    else:
        updater = chainer.training.StandardUpdater(train_iter, optimizer,
                                                   device=device)
    trainer = chainer.training.Trainer(updater, (120000, 'iteration'), output)
    log_fields = ['main/' + x for x in train_chain.loss_labels]
    if train_module == 'MultiboxTrainChain':
        test_iter = chainer.iterators.SerialIterator(
            test,
            batch_size, repeat=False, shuffle=False)
        trainer.extend(
            DetectionVOCEvaluator(
                test_iter, model, use_07_metric=True,
                label_names=parser.LABEL_NAMES),
            trigger=(test_trigger, 'iteration'))
        log_fields.append('validation/main/map')
    else:
        triplet_test = TransformDataset(
            test,
            ImageAugmentation(model.coder, model.insize, model.mean,
                              augment=False))
        test_iter = chainer.iterators.SerialIterator(
            triplet_test,
            batch_size, repeat=False, shuffle=False)
        trainer.extend(
            TripletEvaluator(
                test_iter, model,
                label_names=parser.LABEL_NAMES,
                save_plt=True,
                save_path=output),
            trigger=(test_trigger, 'iteration'))

    log_interval = 10, 'iteration'
    trainer.extend(extensions.LogReport(trigger=log_interval))
    trainer.extend(extensions.observe_lr(), trigger=log_interval)
    trainer.extend(extensions.PrintReport(
        ['epoch', 'iteration', 'lr',
         *log_fields]))
    trainer.extend(extensions.snapshot_object(
        model, 'model_iter_{.updater.iteration}'),
        trigger=(save_trigger, 'iteration'))

    trainer.extend(extensions.ProgressBar(update_interval=10))
    trainer.run()
Exemplo n.º 26
0
def main():
    # Parameters
    epoch_max = 200
    minibatch_max = 64
    lambda_em = 1.0
    gpu_id = 0
    save_interval = 50
    model_name = 'model'
    dataset_path = 'mnist.pklb'

    # Preparation of dataset
    # Change this logic for your dataset
    with open(dataset_path, 'rb') as f:
        mnist = pickle.Unpickler(f).load()
    ds_train = mnist['image']['train']
    ds_train = [
        np.asarray(x.reshape(-1), dtype='float32') / 255.0 for x in ds_train
    ]
    print('the number of dataset', len(ds_train))

    # Setup of our model
    model = net.DDGMNet()
    if gpu_id >= 0:
        model.to_gpu(gpu_id)
    opt = optimizers.Adam(alpha=1e-3)
    opt.setup(model)
    opt.add_hook(WeightDecay(rate=1e-6))

    # Learning loop
    for epoch_id in range(0, epoch_max):
        print('start epoch %d' % (epoch_id))

        # Evaluation
        fake, _ = model.generate(mb_len=10)
        fake = cuda.to_cpu(fake.data)
        img_array = (np.hstack([x.reshape((28, 28))
                                for x in fake]) * 255).astype('uint8')
        Image.fromarray(img_array).save('%s_test_%d.png' %
                                        (model_name, epoch_id))
        print('generated gen_test_%d.png' % (epoch_id))

        sum_e_real = 0.0
        sum_e_fake = 0.0
        sum_entropy = 0.0
        sum_trial = 0
        order_dataset = np.random.permutation(len(ds_train))
        for i in range(0, len(order_dataset), minibatch_max):
            # Energy calculation
            mb = [
                model.xp.asarray(ds_train[j])
                for j in order_dataset[i:i + minibatch_max]
            ]
            mb_len = len(mb)
            mb = chainer.variable.Variable(model.xp.vstack(mb), volatile=False)
            e_real = model.energy(mb, True)
            fake, entropy = model.generate(mb_len=mb_len)
            e_fake = model.energy(fake, True)

            # Gradient calculation with backprop
            model.zerograds()
            e_fake.backward()
            # changing the sign of gradinets related with the energy model
            model.scale_em_grads(-lambda_em)
            (lambda_em * e_real - entropy).backward()

            # go
            opt.update()

            # Outputing information
            print('%d r:%f f:%f e:%f' % (i, float(
                e_real.data), float(e_fake.data), float(entropy.data)))
            sum_e_real += float(e_real.data) * mb_len
            sum_e_fake += float(e_fake.data) * mb_len
            sum_entropy += float(entropy.data) * mb_len

        if epoch_id % save_interval == 0:
            serializers.save_npz('%s_%d' % (model_name, epoch_id), model)
        avr_e_real = sum_e_real / len(ds_train)
        avr_e_fake = sum_e_fake / len(ds_train)
        avr_entropy = sum_entropy / len(ds_train)
        with open('%s_log.txt' % (model_name), 'a') as f:
            f.writelines([
                '%d\t%f\t%f\t%f\n' %
                (epoch_id, avr_e_real, avr_e_fake, avr_entropy)
            ])

    serializers.save_npz('%s_%d' % (model_name, epoch_id), model)
    print('end')
Exemplo n.º 27
0
def main():
    model_cfgs = {
        'resnet50': {
            'class': ResNet50,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet101': {
            'class': ResNet101,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        },
        'resnet152': {
            'class': ResNet152,
            'score_layer_name': 'fc6',
            'kwargs': {
                'arch': 'fb'
            }
        }
    }
    parser = argparse.ArgumentParser(
        description='Learning convnet from ILSVRC2012 dataset')
    parser.add_argument('train', help='Path to root of the train dataset')
    parser.add_argument('val', help='Path to root of the validation dataset')
    parser.add_argument('--model',
                        '-m',
                        choices=model_cfgs.keys(),
                        default='resnet50',
                        help='Convnet models')
    parser.add_argument('--communicator',
                        type=str,
                        default='pure_nccl',
                        help='Type of communicator')
    parser.add_argument('--loaderjob', type=int, default=4)
    parser.add_argument('--batchsize',
                        type=int,
                        default=32,
                        help='Batch size for each worker')
    parser.add_argument('--lr', type=float)
    parser.add_argument('--momentum', type=float, default=0.9)
    parser.add_argument('--weight_decay', type=float, default=0.0001)
    parser.add_argument('--out', type=str, default='result')
    parser.add_argument('--epoch', type=int, default=90)
    args = parser.parse_args()

    # https://docs.chainer.org/en/stable/chainermn/tutorial/tips_faqs.html#using-multiprocessiterator
    if hasattr(multiprocessing, 'set_start_method'):
        multiprocessing.set_start_method('forkserver')
        p = multiprocessing.Process()
        p.start()
        p.join()

    comm = chainermn.create_communicator(args.communicator)
    device = comm.intra_rank

    if args.lr is not None:
        lr = args.lr
    else:
        lr = 0.1 * (args.batchsize * comm.size) / 256
        if comm.rank == 0:
            print('lr={}: lr is selected based on the linear '
                  'scaling rule'.format(lr))

    label_names = directory_parsing_label_names(args.train)

    model_cfg = model_cfgs[args.model]
    extractor = model_cfg['class'](n_class=len(label_names),
                                   **model_cfg['kwargs'])
    extractor.pick = model_cfg['score_layer_name']
    model = Classifier(extractor)
    # Following https://arxiv.org/pdf/1706.02677.pdf,
    # the gamma of the last BN of each resblock is initialized by zeros.
    for l in model.links():
        if isinstance(l, Bottleneck):
            l.conv3.bn.gamma.data[:] = 0

    train_data = DirectoryParsingLabelDataset(args.train)
    val_data = DirectoryParsingLabelDataset(args.val)
    train_data = TransformDataset(train_data, ('img', 'label'),
                                  TrainTransform(extractor.mean))
    val_data = TransformDataset(val_data, ('img', 'label'),
                                ValTransform(extractor.mean))
    print('finished loading dataset')

    if comm.rank == 0:
        train_indices = np.arange(len(train_data))
        val_indices = np.arange(len(val_data))
    else:
        train_indices = None
        val_indices = None

    train_indices = chainermn.scatter_dataset(train_indices,
                                              comm,
                                              shuffle=True)
    val_indices = chainermn.scatter_dataset(val_indices, comm, shuffle=True)
    train_data = train_data.slice[train_indices]
    val_data = val_data.slice[val_indices]
    train_iter = chainer.iterators.MultiprocessIterator(
        train_data, args.batchsize, n_processes=args.loaderjob)
    val_iter = iterators.MultiprocessIterator(val_data,
                                              args.batchsize,
                                              repeat=False,
                                              shuffle=False,
                                              n_processes=args.loaderjob)

    optimizer = chainermn.create_multi_node_optimizer(
        CorrectedMomentumSGD(lr=lr, momentum=args.momentum), comm)
    optimizer.setup(model)
    for param in model.params():
        if param.name not in ('beta', 'gamma'):
            param.update_rule.add_hook(WeightDecay(args.weight_decay))

    if device >= 0:
        chainer.cuda.get_device(device).use()
        model.to_gpu()

    updater = chainer.training.StandardUpdater(train_iter,
                                               optimizer,
                                               device=device)

    trainer = training.Trainer(updater, (args.epoch, 'epoch'), out=args.out)

    @make_shift('lr')
    def warmup_and_exponential_shift(trainer):
        epoch = trainer.updater.epoch_detail
        warmup_epoch = 5
        if epoch < warmup_epoch:
            if lr > 0.1:
                warmup_rate = 0.1 / lr
                rate = warmup_rate \
                    + (1 - warmup_rate) * epoch / warmup_epoch
            else:
                rate = 1
        elif epoch < 30:
            rate = 1
        elif epoch < 60:
            rate = 0.1
        elif epoch < 80:
            rate = 0.01
        else:
            rate = 0.001
        return rate * lr

    trainer.extend(warmup_and_exponential_shift)
    evaluator = chainermn.create_multi_node_evaluator(
        extensions.Evaluator(val_iter, model, device=device), comm)
    trainer.extend(evaluator, trigger=(1, 'epoch'))

    log_interval = 0.1, 'epoch'
    print_interval = 0.1, 'epoch'

    if comm.rank == 0:
        trainer.extend(chainer.training.extensions.observe_lr(),
                       trigger=log_interval)
        trainer.extend(extensions.snapshot_object(
            extractor, 'snapshot_model_{.updater.epoch}.npz'),
                       trigger=(args.epoch, 'epoch'))
        trainer.extend(extensions.LogReport(trigger=log_interval))
        trainer.extend(extensions.PrintReport([
            'iteration', 'epoch', 'elapsed_time', 'lr', 'main/loss',
            'validation/main/loss', 'main/accuracy', 'validation/main/accuracy'
        ]),
                       trigger=print_interval)
        trainer.extend(extensions.ProgressBar(update_interval=10))

    trainer.run()
Exemplo n.º 28
0
def main(args):
    if not os.path.exists(args.dir_model):
        os.mkdir(args.dir_model)
    log_file_name = os.path.join(args.dir_model, 'log.txt')

    args.store()

    dataset_train = get_dataset(args.dataset_train_path)

    print('The number of datasets:')
    print('dataset_train: %d' % (len(dataset_train)))

    # ToDo: loading a model from dir_base_model

    if args.gpuid >= 0:
        cuda.get_device(args.gpuid).use()

    target = NeuralModel()

    if args.gpuid >= 0:
        target.to_gpu(args.gpuid)
    target_opt = chainer.optimizers.SGD(lr=args.train_sgd_lr)
    target_opt.setup(target)
    target_opt.add_hook(WeightDecay(rate=args.train_weight_decay_rate))
    target_opt.add_hook(GradientClipping(args.train_gradient_clipping_norm))

    def save_target(save_path):
        target.to_cpu()
        serializers.save_npz(save_path, target)
        if args.gpuid >= 0:
            target.to_gpu(args.gpuid)

    def save_charvec(save_path):
        target.to_cpu()

        charvec_dict = {}
        for v, k in dataset_train:
            mu, ln_var = target.encode(v[None, :, :])
            charvec_dict[k] = mu.data[0]
        with open(save_path, 'wb') as f:
            pickle.Pickler(f).dump(charvec_dict)

        if not (args.decode_samples is None):
            for i in range(len(args.decode_samples)):
                c = args.decode_samples[i]
                array = (255 * target.decode(
                    charvec_dict[c][None, :]).data[0]).astype('uint8')
                img_size = array.shape[0]
                array = np.broadcast_to(array[:, :, None],
                                        (img_size, img_size, 3))
                Image.fromarray(array).save(save_path + str(i) + '.bmp')

        if args.gpuid >= 0:
            target.to_gpu(args.gpuid)

    print('start training')
    for ep_id in range(args.train_max_epoch):
        np.random.shuffle(dataset_train)

        C = args.kld_function[ep_id]
        epoch_loss = 0
        epoch_rec_loss = 0
        epoch_kld_loss = 0
        for mb_id in range(0, len(dataset_train), args.minibatch_size_train):
            mb = dataset_train[mb_id:mb_id + args.minibatch_size_train]
            mb = [v for v, k in mb]
            x = target.xp.asarray(np.stack(mb))
            loss, rec_loss, kld_loss = target(x, C=C)
            target.zerograds()
            loss.backward()
            target_opt.update()
            loss.unchain_backward()
            epoch_loss += float(loss.data) * len(mb)
            epoch_rec_loss += rec_loss * len(mb)
            epoch_kld_loss += kld_loss * len(mb)
        epoch_loss /= len(dataset_train)
        epoch_rec_loss /= len(dataset_train)
        epoch_kld_loss /= len(dataset_train)
        record = (ep_id, C, epoch_loss, epoch_rec_loss, epoch_kld_loss)
        print('ep=%d C=%.4f loss=%.4f rec_loss=%.4f kld_loss=%.4f' % record)
        with open(log_file_name, 'a') as f:
            f.write('ep=%d C=%.4f loss=%.4f rec_loss=%.4f kld_loss=%.4f\n' %
                    record)

        # MODEL STORING
        if ep_id % args.save_each == 0:
            save_target(
                os.path.join(args.dir_model, 'trained_%d.model' % (ep_id)))
            save_charvec(
                os.path.join(args.dir_model, 'charvec_%d.pklb' % (ep_id)))
            print('model saved.')

    print('training done.')

    save_target(os.path.join(args.dir_model, 'trained_end.model'))
    save_charvec(os.path.join(args.dir_model, 'charvec_end.pklb'))
    print('the last model saved.')
                  0.36202242, 0.05815253, 0.24430117, 0.75336765, 0.53273125,
                  0.90717045, 0.26057194, 0.17050579, 0.26934674, 0.69990416
              ],
              [
                  0.77141326, 0.23113243, 0.02778885, 0.35061881, 0.50881063,
                  0.2445026, 0.87910554, 0.58546545, 0.59878369, 0.03310525
              ],
              [
                  0.862771, 0.54340754, 0.79409784, 0.94202909, 0.12964679,
                  0.34659084, 0.18709705, 0.32934376, 0.69122394, 0.65063928
              ],
              [
                  0.60503851, 0.44435167, 0.96214351, 0.28996983, 0.99434833,
                  0.89644887, 0.11432005, 0.65593003, 0.15861048, 0.51386829
              ]]
    gold = Variable(xp.array([0, 0, 1, 1, 1], dtype=xp.int32))

    discriminator = discriminator(10, dropout=0.0)
    optimizer = optimizers.AdaGrad()
    optimizer.use_cleargrads()
    optimizer.setup(discriminator)
    optimizer.add_hook(WeightDecay(0.0001))

    for i in range(1, args.epoch + 1):
        system_output = discriminator(h)
        loss = F.softmax_cross_entropy(system_output, gold)
        adversarial.cleargrads()
        loss.backward()
        optimizer.update()
        print(loss, F.argmax(system_output, axis=1))
Exemplo n.º 30
0
def set_optimizer(model):
    optimizer = optimizers.AdaGrad()
    optimizer.use_cleargrads()
    optimizer.setup(model)
    optimizer.add_hook(WeightDecay(0.0001))
    return optimizer