def getdata_matching(task, type, batch=4):
    Task, vocab, ind, index, Devset, Testset = preprocess(task, type)
    Train = []
    global Test
    j = 0
    for i in range(len(Task)):
        vocab.index_dataset(Task[i], field_name='words', new_field_name='words')
        if i in ind:
            list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
            slice = random.sample(list, 4)
            another = [x for x in list if x not in slice]
            train = Task[i][another]
            support = Task[i][slice]
        else:
            length = len(Task[i])
            list = [x - 1 for x in range(length)]
            slice = random.sample(list, 20)
            another = [x for x in list if x not in slice]
            train, support = Task[i][another], Task[i][slice]
        if i == index:
            Test = Pair(Testset, support)
            Dev = Pair(Devset, support)
        Train.append(Pair(train, support))

    for i in range(len(Train)):
        Train[i].batch.set_input('words')
        Train[i].support.set_input('words')
        Train[i].batch.set_target('onehot')
        Train[i].support.set_target('onehot')
        Train[i].batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
        Train[i].support.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Test.batch.set_input('words')
    Test.support.set_input('words')
    Test.batch.set_target('onehot')
    Test.support.set_target('onehot')
    Test.batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Test.support.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Dev.batch.set_input('words')
    Dev.support.set_input('words')
    Dev.batch.set_target('onehot')
    Dev.support.set_target('onehot')
    Dev.batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Dev.support.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Train_batch = []
    for i in range(len(Train)):
        if i in ind:
            sampler = BucketSampler(num_buckets=1, batch_size=batch, seq_len_field_name='seq_len')
            Train_batch.append(Pair(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support))
        else:
            sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len')
            Train_batch.append(Pair(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support))

    sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len')
    Test_batch = Pair(Batch(batch_size=batch, dataset=Test.batch, sampler=sampler), Test.support)
    Dev_batch = Pair(Batch(batch_size=batch, dataset=Dev.batch, sampler=sampler), Dev.support)
    return Train_batch, Dev_batch, Test_batch, len(vocab)
示例#2
0
def cnn_train(epoch, data, model, batch_size=32):
    device = torch.device("cuda")
    optim = torch.optim.Adam(model.parameters(), lr=0.001)
    lossfunc = nn.CrossEntropyLoss()

    train_sampler = BucketSampler(batch_size=batch_size,
                                  seq_len_field_name='seq_len')
    train_batch = Batch(batch_size=batch_size,
                        dataset=data,
                        sampler=train_sampler)

    for i in range(epoch):
        loss_list = []
        cnt = 0
        for batch_x, batch_y in train_batch:
            optim.zero_grad()
            batch_x['words'] = batch_x['words'].long().contiguous().to(device)
            batch_y['target'] = batch_y['target'].long().contiguous().to(
                device)
            output = model(batch_x['words'])
            loss = lossfunc(output['pred'], batch_y['target'])
            loss.backward()
            optim.step()
            loss_list.append(loss.item())
            info_str = '[info] Epoch {:d} Iteration {:d} Loss : {:.2f}'.format(
                i, cnt, loss_list[-1])
            print(info_str)
            with open('./cnn_rec.txt', 'a') as fp:
                fp.write(info_str)
                fp.write('\n')
            cnt += 1
        loss_list.clear()
        torch.save(model.state_dict(), './cnn_state.pth')
示例#3
0
def cnn_train(epoch, data, model, batch_size=20):
    device = torch.device("cuda")
    optim = torch.optim.Adam(model.parameters(), lr=0.002)
    lossfunc = nn.CrossEntropyLoss()

    train_sampler = BucketSampler(batch_size=batch_size,
                                  seq_len_field_name='seq_len')
    train_batch = Batch(batch_size=batch_size,
                        dataset=data,
                        sampler=train_sampler)

    for i in range(epoch):
        loss_list = []
        cnt = 0
        for batch_x, batch_y in train_batch:
            batch_x['words'] = batch_x['words'].long().contiguous().to(device)
            batch_y['target'] = batch_y['target'].long().contiguous().to(
                device)

            optim.zero_grad()
            output = model(batch_x['words'])
            loss = lossfunc(output['pred'], batch_y['target'])
            loss.backward()
            optim.step()
            loss_list.append(loss.item())

            print('[info] Epoch %d Iteration %d Loss : %f' %
                  (i, cnt, loss_list[-1]))
            cnt += 1

        loss_list.clear()
    torch.save(model.state_dict(), './cnn_state.pth')
示例#4
0
文件: TENER.py 项目: MANASLU8/TENER
    def _get_trainer(self, models_folder):
        optimizer = optim.SGD(self.parameters(),
                              lr=self.config['lr'],
                              momentum=0.9)

        callbacks = []
        clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
        evaluate_callback = EvaluateCallback(
            self.data_bundle.get_dataset('test'))

        if self.config['warmup_steps'] > 0:
            warmup_callback = WarmupCallback(self.config['warmup_steps'],
                                             schedule='linear')
            callbacks.append(warmup_callback)
        callbacks.extend([clip_callback, evaluate_callback])

        return Trainer(self.data_bundle.get_dataset('train'),
                       self,
                       optimizer,
                       batch_size=self.config['batch_size'],
                       sampler=BucketSampler(),
                       num_workers=2,
                       n_epochs=100,
                       dev_data=self.data_bundle.get_dataset('dev'),
                       metrics=SpanFPreRecMetric(
                           tag_vocab=self.data_bundle.get_vocab('target'),
                           encoding_type=self.config['encoding_type']),
                       dev_batch_size=self.config['batch_size'] * 5,
                       callbacks=callbacks,
                       device=self.config['device'],
                       test_use_tqdm=False,
                       use_tqdm=True,
                       print_every=300,
                       save_path=models_folder)
示例#5
0
 def test_BucketSampler(self):
     sampler = BucketSampler(num_buckets=3,
                             batch_size=16,
                             seq_len_field_name="seq_len")
     data_set = DataSet({
         "x": [[0] * random.randint(1, 10)] * 10,
         "y": [[5, 6]] * 10
     })
     data_set.apply(lambda ins: len(ins["x"]), new_field_name="seq_len")
     indices = sampler(data_set)
     self.assertEqual(len(indices), 10)
示例#6
0
def preprocess(batch=16):
    raw_data1 = []
    raw_data2 = []

    for i in range(len(traindata.data)):
        raw_data1.append(
            Instance(sentence=traindata.data[i],
                     label=int(traindata.target[i])))
    trainset = DataSet(raw_data1)
    trainset.apply(lambda x: pre(x['sentence']), new_field_name='words')

    for i in range(len(testdata.data)):
        raw_data2.append(
            Instance(sentence=testdata.data[i], label=int(testdata.target[i])))
    testset = DataSet(raw_data2)
    testset.apply(lambda x: pre(x['sentence']), new_field_name='words')

    global vocab
    vocab = Vocabulary(min_freq=1).from_dataset(trainset,
                                                testset,
                                                field_name='words')
    vocab.index_dataset(trainset,
                        testset,
                        field_name='words',
                        new_field_name='words')
    trainset.set_input('words')
    testset.set_input('words')

    trainset.apply(lambda x: int(x['label']),
                   new_field_name='target',
                   is_target=True)
    testset.apply(lambda x: int(x['label']),
                  new_field_name='target',
                  is_target=True)

    trainset.apply(lambda x: len(x['words']), new_field_name='seq_len')
    testset.apply(lambda x: len(x['words']), new_field_name='seq_len')

    global vocabsize
    vocabsize = len(vocab)
    sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len')
    train_batch = Batch(batch_size=batch, dataset=trainset, sampler=sampler)
    test_batch = Batch(batch_size=batch, dataset=testset, sampler=sampler)

    return train_batch, test_batch, vocabsize
示例#7
0
def run_train(config):
    train_dir, model_dir = initial_dir('train', config)
    config.train_path = train_dir
    config.model_path = model_dir
    print_config(config, train_dir)
    datainfo = set_up_data('train', config)
    train_sampler = BucketSampler(batch_size=config.batch_size, seq_len_field_name='enc_len')
    criterion = MyLoss(config=config, padding_idx=datainfo.vocabs["train"].to_index(PAD_TOKEN))

    model = Model(vocab=datainfo.vocabs["train"], config=config)
    params = list(model.encoder.parameters()) + list(model.decoder.parameters()) + \
             list(model.reduce_state.parameters())
    initial_lr = config.lr_coverage if config.is_coverage else config.lr
    optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc)

    train_loader = datainfo.datasets["train"]
    valid_loader = datainfo.datasets["dev"]
    summary_writer = tf.compat.v1.summary.FileWriter(train_dir)
    trainer = Trainer(model=model, train_data=train_loader, optimizer=optimizer, loss=criterion,
                      batch_size=config.batch_size, check_code_level=-1,
                      n_epochs=config.n_epochs, print_every=100, dev_data=valid_loader,
                      metrics=FastRougeMetric(pred='prediction', art_oovs='article_oovs',
                                              abstract_sentences='abstract_sentences', config=config,
                                              vocab=datainfo.vocabs["train"]),
                      metric_key="rouge-l-f", validate_every=-1, save_path=model_dir,
                      callbacks=[TrainCallback(config, summary_writer, patience=10)], use_tqdm=False,
                      device=config.visible_gpu)

    logger.info("-" * 5 + "start training" + "-" * 5)

    traininfo = trainer.train(load_best_model=True)
    logger.info('   | end of Train | time: {:5.2f}s | '.format(traininfo["seconds"]))
    logger.info('[INFO] best eval model in epoch %d and iter %d', traininfo["best_epoch"], traininfo["best_step"])
    logger.info(traininfo["best_eval"])

    bestmodel_save_path = os.path.join(config.model_path,
                                       'bestmodel.pkl')  # this is where checkpoints of best models are saved
    state = {
        'encoder_state_dict': model.encoder.state_dict(),
        'decoder_state_dict': model.decoder.state_dict(),
        'reduce_state_dict': model.reduce_state.state_dict()
    }
    torch.save(state, bestmodel_save_path)
    # 不是作为形参传入到Trainer里面的么,怎么里面的model变化会影响到外面的?
    logger.info('[INFO] Saving eval best model to %s', bestmodel_save_path)
示例#8
0
metrics.append(LossMetric(loss=Const.LOSS))

optimizer = Adam(model.parameters(), lr=ops.lr, weight_decay=0)
scheduler = LRScheduler(
    LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)
# callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 15)))
# optimizer = SWATS(model.parameters(), verbose=True)
# optimizer = Adam(model.parameters(), lr=0.005)

device = 'cuda:0' if torch.cuda.is_available() else 'cpu'

trainer = Trainer(train_data=data.datasets['train'],
                  model=model,
                  optimizer=optimizer,
                  sampler=BucketSampler(num_buckets=50,
                                        batch_size=ops.batch_size),
                  device=device,
                  dev_data=data.datasets['dev'],
                  batch_size=ops.batch_size,
                  metrics=metrics,
                  check_code_level=-1,
                  callbacks=callbacks,
                  num_workers=2,
                  n_epochs=ops.num_epochs)
trainer.train()

torch.save(model, 'idcnn.pt')

tester = Tester(data=data.datasets['test'],
                model=model,
                metrics=metrics,
             'dev':"NER/corpus/CoNLL-2003/eng.testa"}
    data = Conll2003NERPipe(encoding_type=encoding_type).process_from_file(paths)
    return data
data = load_data()
print(data)

char_embed = CNNCharEmbedding(vocab=data.get_vocab('words'), embed_size=30, char_emb_size=30, filter_nums=[30],
                              kernel_sizes=[3], word_dropout=0, dropout=0.5)
word_embed = StaticEmbedding(vocab=data.get_vocab('words'),
                             model_dir_or_name='en-glove-6b-100d',
                             requires_grad=True, lower=True, word_dropout=0.01, dropout=0.5)
word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.embedding.weight.data.std()
embed = StackEmbedding([word_embed, char_embed])

model = CNNBiLSTMCRF(embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET],
                     encoding_type=encoding_type)

callbacks = [
            GradientClipCallback(clip_type='value', clip_value=5),
            EvaluateCallback(data=data.get_dataset('test'))  # 额外对test上的数据进行性能评测
            ]

optimizer = SGD(model.parameters(), lr=0.008, momentum=0.9)
scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)

trainer = Trainer(train_data=data.get_dataset('train'), model=model, optimizer=optimizer, sampler=BucketSampler(),
                  device=0, dev_data=data.get_dataset('dev'), batch_size=20,
                  metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type),
                  callbacks=callbacks, num_workers=2, n_epochs=100, dev_batch_size=512)
trainer.train()
示例#10
0
    CWS_dataset = DataSet()
    for key in task_list:
        if key.startswith('CWS'):
            for ins in all_data[target][key]:
                CWS_dataset.append(ins)
            del all_data[target][key]
    CWS_dataset.set_input('chars', 'target', 'seq_len', 'task_class')
    CWS_dataset.set_target('target', 'seq_len')
    all_data[target]['CWS-all'] = CWS_dataset

train_data = dict()
train_data['POS-ctb9'] = all_data['train']['POS-ctb9']
train_data['CWS-all'] = all_data['train']['CWS-all']
train_data = MultiTaskIter(all_data['train'],
                           batch_size=batch_size,
                           sampler=BucketSampler(batch_size=batch_size))

#del pos
trainer = Trainer(train_data=train_data,
                  model=model,
                  optimizer=optimizer,
                  device=device,
                  dev_data=all_data['dev']['POS-ctb9'],
                  batch_size=batch_size,
                  metrics=metric3,
                  loss=None,
                  n_epochs=n_epochs,
                  check_code_level=-1,
                  update_every=update_every,
                  test_use_tqdm=True,
                  callbacks=callbacks)
示例#11
0
                   app_index=char_labels_vocab['APP'],
                   pre_chars_embed=pre_chars_embed,
                   pre_bigrams_embed=pre_bigrams_embed,
                   pre_trigrams_embed=pre_trigrams_embed)

metric1 = SegAppCharParseF1Metric(char_labels_vocab['APP'])
metric2 = CWSMetric(char_labels_vocab['APP'])
metrics = [metric1, metric2]

optimizer = optim.Adam(
    [param for param in model.parameters() if param.requires_grad],
    lr=lr,
    weight_decay=weight_decay,
    betas=[0.9, 0.9])

sampler = BucketSampler(seq_len_field_name='seq_lens')
callbacks = []
# scheduler = LambdaLR(optimizer, lr_lambda=lambda step:(0.75)**(step//5000))
scheduler = StepLR(optimizer, step_size=18, gamma=0.75)
# optim_callback = OptimizerCallback(optimizer, scheduler, update_every)
# callbacks.append(optim_callback)
scheduler_callback = LRScheduler(scheduler)
callbacks.append(scheduler_callback)
callbacks.append(GradientClipCallback(clip_type='value', clip_value=5))

tester = Tester(data=data.datasets['test'],
                model=model,
                metrics=metrics,
                batch_size=64,
                device=device,
                verbose=0)
                     hidden_size=200,
                     num_layers=1,
                     tag_vocab=data.vocabs[Const.TARGET],
                     encoding_type=encoding_type)

callbacks = [
    GradientClipCallback(clip_type='value', clip_value=5),
    FitlogCallback({'test': data.datasets['test']}, verbose=1),
    # SaveModelCallback('save_models/', top=3, only_param=False, save_on_exception=True)
]
# optimizer = Adam(model.parameters(), lr=0.001)
# optimizer = SWATS(model.parameters(), verbose=True)
optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9)
scheduler = LRScheduler(
    LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)

trainer = Trainer(train_data=data.datasets['train'],
                  model=model,
                  optimizer=optimizer,
                  sampler=BucketSampler(batch_size=20),
                  device=1,
                  dev_data=data.datasets['dev'],
                  batch_size=20,
                  metrics=SpanFPreRecMetric(
                      tag_vocab=data.vocabs[Const.TARGET],
                      encoding_type=encoding_type),
                  callbacks=callbacks,
                  num_workers=2,
                  n_epochs=100)
trainer.train()
示例#13
0
@cache_results('caches/conll2003.pkl', _refresh=False)
def load_data():
    # 替换路径
    paths = 'data/conll2003'
    data = Conll2003NERPipe(encoding_type=encoding_type).process_from_file(paths)
    return data
data = load_data()
print(data)

embed = BertEmbedding(data.get_vocab(Const.INPUT), model_dir_or_name='en-base-cased',
                        pool_method='max', requires_grad=True, layers='11', include_cls_sep=False, dropout=0.5,
                      word_dropout=0.01)

callbacks = [
                GradientClipCallback(clip_type='norm', clip_value=1),
                WarmupCallback(warmup=0.1, schedule='linear'),
                EvaluateCallback(data.get_dataset('test'))
            ]

model = BertCRF(embed, tag_vocab=data.get_vocab('target'), encoding_type=encoding_type)
optimizer = AdamW(model.parameters(), lr=2e-5)

trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(),
                  device=0, dev_data=data.datasets['dev'], batch_size=6,
                  metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type),
                  loss=None, callbacks=callbacks, num_workers=2, n_epochs=5,
                  check_code_level=0, update_every=3, test_use_tqdm=False)
trainer.train()

示例#14
0
from fastNLP.core.losses import CMRC2018Loss
from fastNLP.core.metrics import CMRC2018Metric
from fastNLP.io.pipe.qa import CMRC2018BertPipe
from fastNLP import Trainer, BucketSampler
from fastNLP import WarmupCallback, GradientClipCallback
from fastNLP.core.optimizer import AdamW


data_bundle = CMRC2018BertPipe().process_from_file()
data_bundle.rename_field('chars', 'words')

print(data_bundle)

embed = BertEmbedding(data_bundle.get_vocab('words'), model_dir_or_name='cn', requires_grad=True, include_cls_sep=False, auto_truncate=True,
                      dropout=0.5, word_dropout=0.01)
model = BertForQuestionAnswering(embed)
loss = CMRC2018Loss()
metric = CMRC2018Metric()

wm_callback = WarmupCallback(schedule='linear')
gc_callback = GradientClipCallback(clip_value=1, clip_type='norm')
callbacks = [wm_callback, gc_callback]

optimizer = AdamW(model.parameters(), lr=5e-5)

trainer = Trainer(data_bundle.get_dataset('train'), model, loss=loss, optimizer=optimizer,
                  sampler=BucketSampler(seq_len_field_name='context_len'),
                  dev_data=data_bundle.get_dataset('dev'), metrics=metric,
                  callbacks=callbacks, device=0, batch_size=6, num_workers=2, n_epochs=2, print_every=1,
                  test_use_tqdm=False, update_every=10)
trainer.train(load_best_model=False)
示例#15
0
              bi_embed=bi_embed,bert_embed=bert_embed,
              fc_dropout=fc_dropout,
              pos_embed=pos_embed,
              scale=attn_type == 'transformer')
# model = BERT_TENER(tag_vocab=data_bundle.get_vocab('target'), embed=embed, num_layers=num_layers,
#               d_model=d_model, n_head=n_heads,
#               feedforward_dim=dim_feedforward, dropout=dropout,
#               after_norm=after_norm, attn_type=attn_type,
#               bi_embed=bi_embed, bert_embed=bert_embed,
#               fc_dropout=fc_dropout,
#               pos_embed=pos_embed,
#               scale=attn_type == 'transformer')

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test'))

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(),
                  num_workers=2, n_epochs=n_epochs, dev_data=data_bundle.get_dataset('dev'),
                  metrics=SpanFPreRecMetric(tag_vocab=data_bundle.get_vocab('target'), encoding_type=encoding_type),
                  dev_batch_size=batch_size, callbacks=callbacks, device=device, test_use_tqdm=False,
                  use_tqdm=True, print_every=300, save_path=None)
trainer.train(load_best_model=False)
示例#16
0
文件: main.py 项目: shepherd233/MCCWS
    for name, para in model.named_parameters():
        if name.find("task_embed") == -1:
            para.requires_grad = False
        else:
            para.requires_grad = True
            print(name)

optimizer = optm.NoamOpt(
    options.d_model, options.factor, 4000,
    torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9))

optimizer._step = options.step

best_model_file_name = "{}/model.bin".format(root_dir)

train_sampler = BucketSampler(batch_size=options.batch_size,
                              seq_len_field_name='seq_len')
dev_sampler = SequentialSampler()

i2t = utils.to_id_list(tag_vocab.word2idx)
i2task = utils.to_id_list(task_vocab.word2idx)
dev_set.set_input("ori_words")
test_set.set_input("ori_words")

word_dic = pickle.load(open("dict.pkl", "rb"))


def tester(model, test_batch, write_out=False):
    res = []
    prf = utils.CWSEvaluator(i2t)
    prf_dataset = {}
    oov_dataset = {}
示例#17
0
bigram_embed_opt = EmbeddingOption(embed_filepath=bigram_embed_path)

data_name = os.path.basename(file_dir)
cache_fp = 'caches/{}.pkl'.format(data_name)

data = prepare_data(_cache_fp=cache_fp, _refresh=False)

model = ShiftRelayCWSModel(char_embed=data.embeddings['chars'],
                           bigram_embed=data.embeddings['bigrams'],
                           hidden_size=hidden_size,
                           num_layers=num_layers,
                           L=L,
                           num_bigram_per_char=1,
                           drop_p=drop_p)

sampler = BucketSampler(batch_size=32)
optimizer = Adam(model.parameters(), lr=lr)
clipper = GradientClipCallback(clip_value=5, clip_type='value')
callbacks = [clipper]
# if pretrain:
#     fixer = FixEmbedding([model.char_embedding, model.bigram_embedding], fix_until=fix_until)
#     callbacks.append(fixer)
trainer = Trainer(data.datasets['train'],
                  model,
                  optimizer=optimizer,
                  loss=None,
                  batch_size=32,
                  sampler=sampler,
                  update_every=5,
                  n_epochs=3,
                  print_every=5,
示例#18
0
                     hidden_size=1200,
                     num_layers=1,
                     tag_vocab=data.vocabs[Const.TARGET],
                     encoding_type=encoding_type,
                     dropout=dropout)

callbacks = [
    GradientClipCallback(clip_value=5, clip_type='value'),
    EvaluateCallback(data.datasets['test'])
]

optimizer = SGD(model.parameters(), lr=lr, momentum=0.9)
scheduler = LRScheduler(
    LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch)))
callbacks.append(scheduler)

trainer = Trainer(train_data=data.get_dataset('train'),
                  model=model,
                  optimizer=optimizer,
                  sampler=BucketSampler(num_buckets=100),
                  device=0,
                  dev_data=data.get_dataset('dev'),
                  batch_size=batch_size,
                  metrics=SpanFPreRecMetric(
                      tag_vocab=data.vocabs[Const.TARGET],
                      encoding_type=encoding_type),
                  callbacks=callbacks,
                  num_workers=1,
                  n_epochs=100,
                  dev_batch_size=256)
trainer.train()
def getdata_proto(task, type, batch=4):
    Task, vocab, ind, index, testset, devset = preprocess(task, type)
    Train = []
    global Test
    for i in range(len(Task)):
        vocab.index_dataset(Task[i], field_name='words', new_field_name='words')
        if i in ind:
            list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
            rawsupport0 = []
            rawsupport1 = []
            while (len(rawsupport0) == 0 or len(rawsupport1) == 0):
                slice = random.sample(list, 4)
                another = [x for x in list if x not in slice]
                train = Task[i][another]

                for inn in slice:
                    if Task[i][inn]['label'] == -1:
                        rawsupport0.append(inn)
                    else:
                        rawsupport1.append(inn)
            support0 = Task[i][rawsupport0]
            support1 = Task[i][rawsupport1]
        else:
            length = len(Task[i])
            list = [x - 1 for x in range(length)]
            rawsupport0 = []
            rawsupport1 = []
            while (len(rawsupport0) == 0 or len(rawsupport1) == 0):
                slice = random.sample(list, 20)
                another = [x for x in list if x not in slice]
                train = Task[i][another]

                for inn in slice:
                    if Task[i][inn]['label'] == -1:
                        rawsupport0.append(inn)
                    else:
                        rawsupport1.append(inn)

            support0 = Task[i][rawsupport0]
            support1 = Task[i][rawsupport1]
        if i == index:
            Test = Triple(testset, support0, support1)
            Dev = Triple(devset, support0, support1)
        Train.append(Triple(train, support0, support1))

    for i in range(len(Train)):
        Train[i].batch.set_input('words')
        Train[i].support0.set_input('words')
        Train[i].support1.set_input('words')
        Train[i].batch.set_target('onehot')
        Train[i].support0.set_target('onehot')
        Train[i].support1.set_target('onehot')
        Train[i].batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
        Train[i].support0.apply(lambda x: len(x['words']), new_field_name='seq_len')
        Train[i].support1.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Test.batch.set_input('words')
    Test.support0.set_input('words')
    Test.support1.set_input('words')
    Test.batch.set_target('onehot')
    Test.support0.set_target('onehot')
    Test.support1.set_target('onehot')
    Test.batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Test.support0.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Test.support1.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Dev.batch.set_input('words')
    Dev.support0.set_input('words')
    Dev.support1.set_input('words')
    Dev.batch.set_target('onehot')
    Dev.support0.set_target('onehot')
    Dev.support1.set_target('onehot')
    Dev.batch.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Dev.support0.apply(lambda x: len(x['words']), new_field_name='seq_len')
    Dev.support1.apply(lambda x: len(x['words']), new_field_name='seq_len')

    Train_batch = []
    for i in range(len(Train)):
        if i in ind:
            sampler = BucketSampler(num_buckets=1, batch_size=batch, seq_len_field_name='seq_len')
            Train_batch.append(Triple(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support0, Train[i].support1))
        else:
            sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len')
            Train_batch.append(Triple(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support0, Train[i].support1))

    sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len')
    Test_batch = Triple(Batch(batch_size=batch, dataset=Test.batch, sampler=sampler), Test.support0, Test.support1)
    Dev_batch = Triple(Batch(batch_size=batch, dataset=Dev.batch, sampler=sampler), Dev.support0, Dev.support1)
    return Train_batch, Dev_batch, Test_batch, len(vocab)
示例#20
0
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

callbacks = []
clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test'))

if warmup_steps > 0:
    warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
    callbacks.append(warmup_callback)
callbacks.extend([clip_callback, evaluate_callback])

trainer = Trainer(data_bundle.get_dataset('train'),
                  model,
                  optimizer,
                  batch_size=batch_size,
                  sampler=BucketSampler(),
                  num_workers=2,
                  n_epochs=n_epochs,
                  dev_data=data_bundle.get_dataset('dev'),
                  metrics=SpanFPreRecMetric(
                      tag_vocab=data_bundle.get_vocab('target'),
                      encoding_type=encoding_type),
                  dev_batch_size=batch_size,
                  callbacks=callbacks,
                  device=device,
                  test_use_tqdm=False,
                  use_tqdm=True,
                  print_every=300,
                  save_path=None)
trainer.train(load_best_model=False)
示例#21
0
def main():
    if args.do_eval:
        torch.multiprocessing.set_start_method('spawn', force=True)

    if args.model == 'bert':

        model = BertCRF(embed, [data_bundle.get_vocab('target')],
                        encoding_type='bioes')

    else:
        model = StackedTransformersCRF(
            tag_vocabs=[data_bundle.get_vocab('target')],
            embed=embed,
            num_layers=num_layers,
            d_model=d_model,
            n_head=n_heads,
            feedforward_dim=dim_feedforward,
            dropout=trans_dropout,
            after_norm=after_norm,
            attn_type=attn_type,
            bi_embed=None,
            fc_dropout=fc_dropout,
            pos_embed=pos_embed,
            scale=attn_type == 'transformer')
        model = torch.nn.DataParallel(model)

    if args.do_eval:
        if os.path.exists(os.path.expanduser(args.saved_model)):
            print("Load checkpoint from {}".format(
                os.path.expanduser(args.saved_model)))
            model = torch.load(args.saved_model)
            model.to('cuda')
            print('model to CUDA')

    optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8)

    callbacks = []
    clip_callback = GradientClipCallback(clip_type='value', clip_value=5)
    evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test'))
    checkpoint_callback = CheckPointCallback(os.path.join(
        directory, 'model.pth'),
                                             delete_when_train_finish=False,
                                             recovery_fitlog=True)

    if warmup_steps > 0:
        warmup_callback = WarmupCallback(warmup_steps, schedule='linear')
        callbacks.append(warmup_callback)
    callbacks.extend([clip_callback, checkpoint_callback, evaluate_callback])

    if not args.do_eval:
        trainer = Trainer(data_bundle.get_dataset('train'),
                          model,
                          optimizer,
                          batch_size=batch_size,
                          sampler=BucketSampler(),
                          num_workers=no_cpu,
                          n_epochs=args.n_epochs,
                          dev_data=data_bundle.get_dataset('dev'),
                          metrics=SpanFPreRecMetric(
                              tag_vocab=data_bundle.get_vocab('target'),
                              encoding_type=encoding_type),
                          dev_batch_size=batch_size,
                          callbacks=callbacks,
                          device=args.device,
                          test_use_tqdm=True,
                          use_tqdm=True,
                          print_every=300,
                          save_path=os.path.join(directory, 'best'))

        trainer.train(load_best_model=True)

        predictor = Predictor(model)
        predict(os.path.join(directory, 'predictions_dev.tsv'), data_bundle,
                predictor, 'dev')
        predict(os.path.join(directory, 'predictions_test.tsv'), data_bundle,
                predictor, 'test')

    else:
        print('Predicting')
        # predictions of multiple files
        torch.multiprocessing.freeze_support()
        model.share_memory()
        predictor = Predictor(model)

        if len(files) > multiprocessing.cpu_count():
            with torch.multiprocessing.Pool(processes=no_cpu) as p:
                with tqdm(total=len(files)) as pbar:
                    for i, _ in enumerate(
                            p.imap_unordered(
                                partial(predict,
                                        data_bundle=data_bundle,
                                        predictor=predictor,
                                        predict_on='train',
                                        do_eval=args.do_eval), files)):
                        pbar.update()
        else:
            for file in tqdm(files):
                predict(file, data_bundle, predictor, 'train', args.do_eval)