Exemplo n.º 1
0
def run_train():
    datainfo, vocabs = set_up_data()
    train_sampler = RandomSampler()
    criterion = SummLoss(config=config, padding_idx=vocabs.to_index(PAD_TOKEN))
    model = CGSum(config, vocab=vocabs)
    model.to(device)

    initial_lr = config.lr
    logger.info(f"learning rate = {initial_lr}")
    optimizer = Adagrad(filter(lambda p: p.requires_grad, model.parameters()),
                        lr=initial_lr,
                        initial_accumulator_value=config.adagrad_init_acc)

    train_loader = datainfo.datasets["train"]
    valid_loader = datainfo.datasets["dev"]

    callbacks = [
        TrainCallback(config, patience=10),
        FitlogCallback(),
        LRDecayCallback(optimizer.param_groups, steps=args.weight_decay_step)
    ]
    trainer = Trainer(model=model,
                      train_data=train_loader,
                      optimizer=optimizer,
                      loss=criterion,
                      batch_size=config.batch_size,
                      check_code_level=-1,
                      sampler=train_sampler,
                      n_epochs=config.n_epochs,
                      print_every=100,
                      dev_data=valid_loader,
                      update_every=args.update_every,
                      metrics=FastRougeMetric(
                          pred='prediction',
                          art_oovs='article_oovs',
                          abstract_sentences='abstract_sentences',
                          config=config,
                          vocab=datainfo.vocabs["vocab"]),
                      metric_key="rouge-l-f",
                      validate_every=args.validate_every * args.update_every,
                      save_path=None,
                      callbacks=callbacks,
                      use_tqdm=True)

    logger.info("-" * 5 + "start training" + "-" * 5)
    traininfo = trainer.train(load_best_model=True)

    logger.info('   | end of Train | time: {:5.2f}s | '.format(
        traininfo["seconds"]))
    logger.info('[INFO] best eval model in epoch %d and iter %d',
                traininfo["best_epoch"], traininfo["best_step"])
Exemplo n.º 2
0
if args.optim == 'adam':
    optimizer = optim.AdamW(param_, lr=args.lr, weight_decay=args.weight_decay)
elif args.optim == 'sgd':
    # optimizer = optim.SGD(model.parameters(),lr=args.lr,momentum=args.momentum,
    #                       weight_decay=args.weight_decay)
    optimizer = optim.SGD(param_,
                          lr=args.lr,
                          momentum=args.momentum,
                          weight_decay=args.weight_decay)

if args.dataset == 'msra':
    datasets['dev'] = datasets['test']
fitlog_evaluate_dataset = {'test': datasets['test']}
if args.test_train:
    fitlog_evaluate_dataset['train'] = datasets['train']
evaluate_callback = FitlogCallback(fitlog_evaluate_dataset, verbose=1)
lrschedule_callback = LRScheduler(
    lr_scheduler=LambdaLR(optimizer, lambda ep: 1 / (1 + 0.05 * ep)))
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)


class Unfreeze_Callback(Callback):
    def __init__(self, bert_embedding, fix_epoch_num):
        super().__init__()
        self.bert_embedding = bert_embedding
        self.fix_epoch_num = fix_epoch_num
        assert self.bert_embedding.requires_grad == False

    def on_epoch_begin(self):
        if self.epoch == self.fix_epoch_num + 1:
            self.bert_embedding.requires_grad = True
Exemplo n.º 3
0
pad_id = data_bundle.pad_id

model = ENBertReverseDict(pre_name, word2bpes, pad_id=pad_id,
                          number_word_in_train=data_bundle.number_word_in_train)

if torch.cuda.is_available():
    model.cuda()

optimizer = optim.AdamW(model.parameters(), lr=lr)

data = {}
for name in ['seen', 'unseen', 'desc']:
    data[name] = data_bundle.get_dataset(name)

callbacks = [GradientClipCallback(clip_type='value', clip_value=5), WarmupCallback(warmup=0.01, schedule='linear')]
callbacks.append(FitlogCallback(data=data, verbose=1))
train_data = data_bundle.get_dataset('train')
train_data.add_seq_len('input')

# from collections import Counter
# print(Counter(train_data.get_field('seq_len').content))
# exit(0)

sampler = BucketSampler()
clip_max_length(train_data, data_bundle)

trainer = Trainer(train_data=train_data, model=model,
                  optimizer=optimizer, loss=CrossEntropyLoss(),
                 batch_size=batch_size, sampler=sampler, drop_last=False, update_every=1,
                 num_workers=1, n_epochs=n_epochs, print_every=5,
                 dev_data=data_bundle.get_dataset('dev'), metrics=MonoMetric(),
Exemplo n.º 4
0
def main():
    args = parse_args()

    if args.debug:
        fitlog.debug()

    fitlog.set_log_dir(args.log_dir)
    fitlog.commit(__file__)
    fitlog.add_hyper_in_file(__file__)
    fitlog.add_hyper(args)
    if args.gpu != 'all':
        os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu

    train_set, dev_set, test_set, temp_ent_vocab = load_fewrel_graph_data(
        data_dir=args.data_dir)

    print('data directory: {}'.format(args.data_dir))
    print('# of train samples: {}'.format(len(train_set)))
    print('# of dev samples: {}'.format(len(dev_set)))
    print('# of test samples: {}'.format(len(test_set)))

    ent_vocab, rel_vocab = load_ent_rel_vocabs(path='../')

    # load entity embeddings
    ent_index = []
    for k, v in temp_ent_vocab.items():
        ent_index.append(ent_vocab[k])
    ent_index = torch.tensor(ent_index)
    ent_emb = np.load(os.path.join(args.model_path, 'entities.npy'))
    ent_embedding = nn.Embedding.from_pretrained(torch.from_numpy(ent_emb))
    ent_emb = ent_embedding(ent_index.view(1, -1)).squeeze().detach()

    # load CoLAKE parameters
    config = RobertaConfig.from_pretrained('roberta-base', type_vocab_size=3)
    model = CoLAKEForRE(config,
                        num_types=len(train_set.label_vocab),
                        ent_emb=ent_emb)
    states_dict = torch.load(os.path.join(args.model_path, 'model.bin'))
    model.load_state_dict(states_dict, strict=False)
    print('parameters below are randomly initializecd:')
    for name, param in model.named_parameters():
        if name not in states_dict:
            print(name)

    # tie relation classification head
    rel_index = []
    for k, v in train_set.label_vocab.items():
        rel_index.append(rel_vocab[k])
    rel_index = torch.LongTensor(rel_index)
    rel_embeddings = nn.Embedding.from_pretrained(
        states_dict['rel_embeddings.weight'])
    rel_index = rel_index.cuda()
    rel_cls_weight = rel_embeddings(rel_index.view(1, -1)).squeeze()
    model.tie_rel_weights(rel_cls_weight)

    model.rel_head.dense.weight.data = states_dict['rel_lm_head.dense.weight']
    model.rel_head.dense.bias.data = states_dict['rel_lm_head.dense.bias']
    model.rel_head.layer_norm.weight.data = states_dict[
        'rel_lm_head.layer_norm.weight']
    model.rel_head.layer_norm.bias.data = states_dict[
        'rel_lm_head.layer_norm.bias']

    model.resize_token_embeddings(
        len(RobertaTokenizer.from_pretrained('roberta-base')) + 4)
    print('parameters of CoLAKE has been loaded.')

    # fine-tune
    no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight', 'embedding']
    param_optimizer = list(model.named_parameters())
    optimizer_grouped_parameters = [{
        'params':
        [p for n, p in param_optimizer if not any(nd in n for nd in no_decay)],
        'weight_decay':
        args.weight_decay
    }, {
        'params':
        [p for n, p in param_optimizer if any(nd in n for nd in no_decay)],
        'weight_decay':
        0.0
    }]
    optimizer = optim.AdamW(optimizer_grouped_parameters,
                            lr=args.lr,
                            betas=(0.9, args.beta),
                            eps=1e-6)

    metrics = [MacroMetric(pred='pred', target='target')]

    test_data_iter = TorchLoaderIter(dataset=test_set,
                                     batch_size=args.batch_size,
                                     sampler=RandomSampler(),
                                     num_workers=4,
                                     collate_fn=test_set.collate_fn)
    devices = list(range(torch.cuda.device_count()))
    tester = Tester(data=test_data_iter,
                    model=model,
                    metrics=metrics,
                    device=devices)
    # tester.test()

    fitlog_callback = FitlogCallback(tester=tester,
                                     log_loss_every=100,
                                     verbose=1)
    gradient_clip_callback = GradientClipCallback(clip_value=1,
                                                  clip_type='norm')
    warmup_callback = WarmupCallback(warmup=args.warm_up, schedule='linear')

    bsz = args.batch_size // args.grad_accumulation

    train_data_iter = TorchLoaderIter(dataset=train_set,
                                      batch_size=bsz,
                                      sampler=RandomSampler(),
                                      num_workers=4,
                                      collate_fn=train_set.collate_fn)
    dev_data_iter = TorchLoaderIter(dataset=dev_set,
                                    batch_size=bsz,
                                    sampler=RandomSampler(),
                                    num_workers=4,
                                    collate_fn=dev_set.collate_fn)

    trainer = Trainer(
        train_data=train_data_iter,
        dev_data=dev_data_iter,
        model=model,
        optimizer=optimizer,
        loss=LossInForward(),
        batch_size=bsz,
        update_every=args.grad_accumulation,
        n_epochs=args.epoch,
        metrics=metrics,
        callbacks=[fitlog_callback, gradient_clip_callback, warmup_callback],
        device=devices,
        use_tqdm=True)

    trainer.train(load_best_model=False)
Exemplo n.º 5
0
    },
    {
        "params": [
            p for n, p in model.named_parameters()
            if any(nd in n for nd in no_decay)
        ],
        "weight_decay":
        0.0,
    },
]
optimizer = optim.AdamW(optimizer_grouped_parameters, lr=args.lr)

callbacks = []
callbacks.append(WarmupCallback(0.01, "linear"))
callbacks.append(FitlogCallback(
    # data_bundle.get_dataset('train')
))

import torch
import torch.nn.functional as F
from fastNLP import LossBase


class SmoothLoss(LossBase):
    def __init__(self, smooth_eps=0):
        super().__init__()
        self.smooth_eps = smooth_eps

    def get_loss(self, pred, target):
        """