def getdata_matching(task, type, batch=4): Task, vocab, ind, index, Devset, Testset = preprocess(task, type) Train = [] global Test j = 0 for i in range(len(Task)): vocab.index_dataset(Task[i], field_name='words', new_field_name='words') if i in ind: list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] slice = random.sample(list, 4) another = [x for x in list if x not in slice] train = Task[i][another] support = Task[i][slice] else: length = len(Task[i]) list = [x - 1 for x in range(length)] slice = random.sample(list, 20) another = [x for x in list if x not in slice] train, support = Task[i][another], Task[i][slice] if i == index: Test = Pair(Testset, support) Dev = Pair(Devset, support) Train.append(Pair(train, support)) for i in range(len(Train)): Train[i].batch.set_input('words') Train[i].support.set_input('words') Train[i].batch.set_target('onehot') Train[i].support.set_target('onehot') Train[i].batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Train[i].support.apply(lambda x: len(x['words']), new_field_name='seq_len') Test.batch.set_input('words') Test.support.set_input('words') Test.batch.set_target('onehot') Test.support.set_target('onehot') Test.batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Test.support.apply(lambda x: len(x['words']), new_field_name='seq_len') Dev.batch.set_input('words') Dev.support.set_input('words') Dev.batch.set_target('onehot') Dev.support.set_target('onehot') Dev.batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Dev.support.apply(lambda x: len(x['words']), new_field_name='seq_len') Train_batch = [] for i in range(len(Train)): if i in ind: sampler = BucketSampler(num_buckets=1, batch_size=batch, seq_len_field_name='seq_len') Train_batch.append(Pair(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support)) else: sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len') Train_batch.append(Pair(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support)) sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len') Test_batch = Pair(Batch(batch_size=batch, dataset=Test.batch, sampler=sampler), Test.support) Dev_batch = Pair(Batch(batch_size=batch, dataset=Dev.batch, sampler=sampler), Dev.support) return Train_batch, Dev_batch, Test_batch, len(vocab)
def cnn_train(epoch, data, model, batch_size=32): device = torch.device("cuda") optim = torch.optim.Adam(model.parameters(), lr=0.001) lossfunc = nn.CrossEntropyLoss() train_sampler = BucketSampler(batch_size=batch_size, seq_len_field_name='seq_len') train_batch = Batch(batch_size=batch_size, dataset=data, sampler=train_sampler) for i in range(epoch): loss_list = [] cnt = 0 for batch_x, batch_y in train_batch: optim.zero_grad() batch_x['words'] = batch_x['words'].long().contiguous().to(device) batch_y['target'] = batch_y['target'].long().contiguous().to( device) output = model(batch_x['words']) loss = lossfunc(output['pred'], batch_y['target']) loss.backward() optim.step() loss_list.append(loss.item()) info_str = '[info] Epoch {:d} Iteration {:d} Loss : {:.2f}'.format( i, cnt, loss_list[-1]) print(info_str) with open('./cnn_rec.txt', 'a') as fp: fp.write(info_str) fp.write('\n') cnt += 1 loss_list.clear() torch.save(model.state_dict(), './cnn_state.pth')
def cnn_train(epoch, data, model, batch_size=20): device = torch.device("cuda") optim = torch.optim.Adam(model.parameters(), lr=0.002) lossfunc = nn.CrossEntropyLoss() train_sampler = BucketSampler(batch_size=batch_size, seq_len_field_name='seq_len') train_batch = Batch(batch_size=batch_size, dataset=data, sampler=train_sampler) for i in range(epoch): loss_list = [] cnt = 0 for batch_x, batch_y in train_batch: batch_x['words'] = batch_x['words'].long().contiguous().to(device) batch_y['target'] = batch_y['target'].long().contiguous().to( device) optim.zero_grad() output = model(batch_x['words']) loss = lossfunc(output['pred'], batch_y['target']) loss.backward() optim.step() loss_list.append(loss.item()) print('[info] Epoch %d Iteration %d Loss : %f' % (i, cnt, loss_list[-1])) cnt += 1 loss_list.clear() torch.save(model.state_dict(), './cnn_state.pth')
def _get_trainer(self, models_folder): optimizer = optim.SGD(self.parameters(), lr=self.config['lr'], momentum=0.9) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback( self.data_bundle.get_dataset('test')) if self.config['warmup_steps'] > 0: warmup_callback = WarmupCallback(self.config['warmup_steps'], schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) return Trainer(self.data_bundle.get_dataset('train'), self, optimizer, batch_size=self.config['batch_size'], sampler=BucketSampler(), num_workers=2, n_epochs=100, dev_data=self.data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric( tag_vocab=self.data_bundle.get_vocab('target'), encoding_type=self.config['encoding_type']), dev_batch_size=self.config['batch_size'] * 5, callbacks=callbacks, device=self.config['device'], test_use_tqdm=False, use_tqdm=True, print_every=300, save_path=models_folder)
def test_BucketSampler(self): sampler = BucketSampler(num_buckets=3, batch_size=16, seq_len_field_name="seq_len") data_set = DataSet({ "x": [[0] * random.randint(1, 10)] * 10, "y": [[5, 6]] * 10 }) data_set.apply(lambda ins: len(ins["x"]), new_field_name="seq_len") indices = sampler(data_set) self.assertEqual(len(indices), 10)
def preprocess(batch=16): raw_data1 = [] raw_data2 = [] for i in range(len(traindata.data)): raw_data1.append( Instance(sentence=traindata.data[i], label=int(traindata.target[i]))) trainset = DataSet(raw_data1) trainset.apply(lambda x: pre(x['sentence']), new_field_name='words') for i in range(len(testdata.data)): raw_data2.append( Instance(sentence=testdata.data[i], label=int(testdata.target[i]))) testset = DataSet(raw_data2) testset.apply(lambda x: pre(x['sentence']), new_field_name='words') global vocab vocab = Vocabulary(min_freq=1).from_dataset(trainset, testset, field_name='words') vocab.index_dataset(trainset, testset, field_name='words', new_field_name='words') trainset.set_input('words') testset.set_input('words') trainset.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) testset.apply(lambda x: int(x['label']), new_field_name='target', is_target=True) trainset.apply(lambda x: len(x['words']), new_field_name='seq_len') testset.apply(lambda x: len(x['words']), new_field_name='seq_len') global vocabsize vocabsize = len(vocab) sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len') train_batch = Batch(batch_size=batch, dataset=trainset, sampler=sampler) test_batch = Batch(batch_size=batch, dataset=testset, sampler=sampler) return train_batch, test_batch, vocabsize
def run_train(config): train_dir, model_dir = initial_dir('train', config) config.train_path = train_dir config.model_path = model_dir print_config(config, train_dir) datainfo = set_up_data('train', config) train_sampler = BucketSampler(batch_size=config.batch_size, seq_len_field_name='enc_len') criterion = MyLoss(config=config, padding_idx=datainfo.vocabs["train"].to_index(PAD_TOKEN)) model = Model(vocab=datainfo.vocabs["train"], config=config) params = list(model.encoder.parameters()) + list(model.decoder.parameters()) + \ list(model.reduce_state.parameters()) initial_lr = config.lr_coverage if config.is_coverage else config.lr optimizer = Adagrad(params, lr=initial_lr, initial_accumulator_value=config.adagrad_init_acc) train_loader = datainfo.datasets["train"] valid_loader = datainfo.datasets["dev"] summary_writer = tf.compat.v1.summary.FileWriter(train_dir) trainer = Trainer(model=model, train_data=train_loader, optimizer=optimizer, loss=criterion, batch_size=config.batch_size, check_code_level=-1, n_epochs=config.n_epochs, print_every=100, dev_data=valid_loader, metrics=FastRougeMetric(pred='prediction', art_oovs='article_oovs', abstract_sentences='abstract_sentences', config=config, vocab=datainfo.vocabs["train"]), metric_key="rouge-l-f", validate_every=-1, save_path=model_dir, callbacks=[TrainCallback(config, summary_writer, patience=10)], use_tqdm=False, device=config.visible_gpu) logger.info("-" * 5 + "start training" + "-" * 5) traininfo = trainer.train(load_best_model=True) logger.info(' | end of Train | time: {:5.2f}s | '.format(traininfo["seconds"])) logger.info('[INFO] best eval model in epoch %d and iter %d', traininfo["best_epoch"], traininfo["best_step"]) logger.info(traininfo["best_eval"]) bestmodel_save_path = os.path.join(config.model_path, 'bestmodel.pkl') # this is where checkpoints of best models are saved state = { 'encoder_state_dict': model.encoder.state_dict(), 'decoder_state_dict': model.decoder.state_dict(), 'reduce_state_dict': model.reduce_state.state_dict() } torch.save(state, bestmodel_save_path) # 不是作为形参传入到Trainer里面的么,怎么里面的model变化会影响到外面的? logger.info('[INFO] Saving eval best model to %s', bestmodel_save_path)
metrics.append(LossMetric(loss=Const.LOSS)) optimizer = Adam(model.parameters(), lr=ops.lr, weight_decay=0) scheduler = LRScheduler( LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) # callbacks.append(LRScheduler(CosineAnnealingLR(optimizer, 15))) # optimizer = SWATS(model.parameters(), verbose=True) # optimizer = Adam(model.parameters(), lr=0.005) device = 'cuda:0' if torch.cuda.is_available() else 'cpu' trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(num_buckets=50, batch_size=ops.batch_size), device=device, dev_data=data.datasets['dev'], batch_size=ops.batch_size, metrics=metrics, check_code_level=-1, callbacks=callbacks, num_workers=2, n_epochs=ops.num_epochs) trainer.train() torch.save(model, 'idcnn.pt') tester = Tester(data=data.datasets['test'], model=model, metrics=metrics,
'dev':"NER/corpus/CoNLL-2003/eng.testa"} data = Conll2003NERPipe(encoding_type=encoding_type).process_from_file(paths) return data data = load_data() print(data) char_embed = CNNCharEmbedding(vocab=data.get_vocab('words'), embed_size=30, char_emb_size=30, filter_nums=[30], kernel_sizes=[3], word_dropout=0, dropout=0.5) word_embed = StaticEmbedding(vocab=data.get_vocab('words'), model_dir_or_name='en-glove-6b-100d', requires_grad=True, lower=True, word_dropout=0.01, dropout=0.5) word_embed.embedding.weight.data = word_embed.embedding.weight.data/word_embed.embedding.weight.data.std() embed = StackEmbedding([word_embed, char_embed]) model = CNNBiLSTMCRF(embed, hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type) callbacks = [ GradientClipCallback(clip_type='value', clip_value=5), EvaluateCallback(data=data.get_dataset('test')) # 额外对test上的数据进行性能评测 ] optimizer = SGD(model.parameters(), lr=0.008, momentum=0.9) scheduler = LRScheduler(LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) trainer = Trainer(train_data=data.get_dataset('train'), model=model, optimizer=optimizer, sampler=BucketSampler(), device=0, dev_data=data.get_dataset('dev'), batch_size=20, metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), callbacks=callbacks, num_workers=2, n_epochs=100, dev_batch_size=512) trainer.train()
CWS_dataset = DataSet() for key in task_list: if key.startswith('CWS'): for ins in all_data[target][key]: CWS_dataset.append(ins) del all_data[target][key] CWS_dataset.set_input('chars', 'target', 'seq_len', 'task_class') CWS_dataset.set_target('target', 'seq_len') all_data[target]['CWS-all'] = CWS_dataset train_data = dict() train_data['POS-ctb9'] = all_data['train']['POS-ctb9'] train_data['CWS-all'] = all_data['train']['CWS-all'] train_data = MultiTaskIter(all_data['train'], batch_size=batch_size, sampler=BucketSampler(batch_size=batch_size)) #del pos trainer = Trainer(train_data=train_data, model=model, optimizer=optimizer, device=device, dev_data=all_data['dev']['POS-ctb9'], batch_size=batch_size, metrics=metric3, loss=None, n_epochs=n_epochs, check_code_level=-1, update_every=update_every, test_use_tqdm=True, callbacks=callbacks)
app_index=char_labels_vocab['APP'], pre_chars_embed=pre_chars_embed, pre_bigrams_embed=pre_bigrams_embed, pre_trigrams_embed=pre_trigrams_embed) metric1 = SegAppCharParseF1Metric(char_labels_vocab['APP']) metric2 = CWSMetric(char_labels_vocab['APP']) metrics = [metric1, metric2] optimizer = optim.Adam( [param for param in model.parameters() if param.requires_grad], lr=lr, weight_decay=weight_decay, betas=[0.9, 0.9]) sampler = BucketSampler(seq_len_field_name='seq_lens') callbacks = [] # scheduler = LambdaLR(optimizer, lr_lambda=lambda step:(0.75)**(step//5000)) scheduler = StepLR(optimizer, step_size=18, gamma=0.75) # optim_callback = OptimizerCallback(optimizer, scheduler, update_every) # callbacks.append(optim_callback) scheduler_callback = LRScheduler(scheduler) callbacks.append(scheduler_callback) callbacks.append(GradientClipCallback(clip_type='value', clip_value=5)) tester = Tester(data=data.datasets['test'], model=model, metrics=metrics, batch_size=64, device=device, verbose=0)
hidden_size=200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type) callbacks = [ GradientClipCallback(clip_type='value', clip_value=5), FitlogCallback({'test': data.datasets['test']}, verbose=1), # SaveModelCallback('save_models/', top=3, only_param=False, save_on_exception=True) ] # optimizer = Adam(model.parameters(), lr=0.001) # optimizer = SWATS(model.parameters(), verbose=True) optimizer = SGD(model.parameters(), lr=0.01, momentum=0.9) scheduler = LRScheduler( LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(batch_size=20), device=1, dev_data=data.datasets['dev'], batch_size=20, metrics=SpanFPreRecMetric( tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), callbacks=callbacks, num_workers=2, n_epochs=100) trainer.train()
@cache_results('caches/conll2003.pkl', _refresh=False) def load_data(): # 替换路径 paths = 'data/conll2003' data = Conll2003NERPipe(encoding_type=encoding_type).process_from_file(paths) return data data = load_data() print(data) embed = BertEmbedding(data.get_vocab(Const.INPUT), model_dir_or_name='en-base-cased', pool_method='max', requires_grad=True, layers='11', include_cls_sep=False, dropout=0.5, word_dropout=0.01) callbacks = [ GradientClipCallback(clip_type='norm', clip_value=1), WarmupCallback(warmup=0.1, schedule='linear'), EvaluateCallback(data.get_dataset('test')) ] model = BertCRF(embed, tag_vocab=data.get_vocab('target'), encoding_type=encoding_type) optimizer = AdamW(model.parameters(), lr=2e-5) trainer = Trainer(train_data=data.datasets['train'], model=model, optimizer=optimizer, sampler=BucketSampler(), device=0, dev_data=data.datasets['dev'], batch_size=6, metrics=SpanFPreRecMetric(tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), loss=None, callbacks=callbacks, num_workers=2, n_epochs=5, check_code_level=0, update_every=3, test_use_tqdm=False) trainer.train()
from fastNLP.core.losses import CMRC2018Loss from fastNLP.core.metrics import CMRC2018Metric from fastNLP.io.pipe.qa import CMRC2018BertPipe from fastNLP import Trainer, BucketSampler from fastNLP import WarmupCallback, GradientClipCallback from fastNLP.core.optimizer import AdamW data_bundle = CMRC2018BertPipe().process_from_file() data_bundle.rename_field('chars', 'words') print(data_bundle) embed = BertEmbedding(data_bundle.get_vocab('words'), model_dir_or_name='cn', requires_grad=True, include_cls_sep=False, auto_truncate=True, dropout=0.5, word_dropout=0.01) model = BertForQuestionAnswering(embed) loss = CMRC2018Loss() metric = CMRC2018Metric() wm_callback = WarmupCallback(schedule='linear') gc_callback = GradientClipCallback(clip_value=1, clip_type='norm') callbacks = [wm_callback, gc_callback] optimizer = AdamW(model.parameters(), lr=5e-5) trainer = Trainer(data_bundle.get_dataset('train'), model, loss=loss, optimizer=optimizer, sampler=BucketSampler(seq_len_field_name='context_len'), dev_data=data_bundle.get_dataset('dev'), metrics=metric, callbacks=callbacks, device=0, batch_size=6, num_workers=2, n_epochs=2, print_every=1, test_use_tqdm=False, update_every=10) trainer.train(load_best_model=False)
bi_embed=bi_embed,bert_embed=bert_embed, fc_dropout=fc_dropout, pos_embed=pos_embed, scale=attn_type == 'transformer') # model = BERT_TENER(tag_vocab=data_bundle.get_vocab('target'), embed=embed, num_layers=num_layers, # d_model=d_model, n_head=n_heads, # feedforward_dim=dim_feedforward, dropout=dropout, # after_norm=after_norm, attn_type=attn_type, # bi_embed=bi_embed, bert_embed=bert_embed, # fc_dropout=fc_dropout, # pos_embed=pos_embed, # scale=attn_type == 'transformer') optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test')) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=2, n_epochs=n_epochs, dev_data=data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric(tag_vocab=data_bundle.get_vocab('target'), encoding_type=encoding_type), dev_batch_size=batch_size, callbacks=callbacks, device=device, test_use_tqdm=False, use_tqdm=True, print_every=300, save_path=None) trainer.train(load_best_model=False)
for name, para in model.named_parameters(): if name.find("task_embed") == -1: para.requires_grad = False else: para.requires_grad = True print(name) optimizer = optm.NoamOpt( options.d_model, options.factor, 4000, torch.optim.Adam(model.parameters(), lr=0, betas=(0.9, 0.98), eps=1e-9)) optimizer._step = options.step best_model_file_name = "{}/model.bin".format(root_dir) train_sampler = BucketSampler(batch_size=options.batch_size, seq_len_field_name='seq_len') dev_sampler = SequentialSampler() i2t = utils.to_id_list(tag_vocab.word2idx) i2task = utils.to_id_list(task_vocab.word2idx) dev_set.set_input("ori_words") test_set.set_input("ori_words") word_dic = pickle.load(open("dict.pkl", "rb")) def tester(model, test_batch, write_out=False): res = [] prf = utils.CWSEvaluator(i2t) prf_dataset = {} oov_dataset = {}
bigram_embed_opt = EmbeddingOption(embed_filepath=bigram_embed_path) data_name = os.path.basename(file_dir) cache_fp = 'caches/{}.pkl'.format(data_name) data = prepare_data(_cache_fp=cache_fp, _refresh=False) model = ShiftRelayCWSModel(char_embed=data.embeddings['chars'], bigram_embed=data.embeddings['bigrams'], hidden_size=hidden_size, num_layers=num_layers, L=L, num_bigram_per_char=1, drop_p=drop_p) sampler = BucketSampler(batch_size=32) optimizer = Adam(model.parameters(), lr=lr) clipper = GradientClipCallback(clip_value=5, clip_type='value') callbacks = [clipper] # if pretrain: # fixer = FixEmbedding([model.char_embedding, model.bigram_embedding], fix_until=fix_until) # callbacks.append(fixer) trainer = Trainer(data.datasets['train'], model, optimizer=optimizer, loss=None, batch_size=32, sampler=sampler, update_every=5, n_epochs=3, print_every=5,
hidden_size=1200, num_layers=1, tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type, dropout=dropout) callbacks = [ GradientClipCallback(clip_value=5, clip_type='value'), EvaluateCallback(data.datasets['test']) ] optimizer = SGD(model.parameters(), lr=lr, momentum=0.9) scheduler = LRScheduler( LambdaLR(optimizer, lr_lambda=lambda epoch: 1 / (1 + 0.05 * epoch))) callbacks.append(scheduler) trainer = Trainer(train_data=data.get_dataset('train'), model=model, optimizer=optimizer, sampler=BucketSampler(num_buckets=100), device=0, dev_data=data.get_dataset('dev'), batch_size=batch_size, metrics=SpanFPreRecMetric( tag_vocab=data.vocabs[Const.TARGET], encoding_type=encoding_type), callbacks=callbacks, num_workers=1, n_epochs=100, dev_batch_size=256) trainer.train()
def getdata_proto(task, type, batch=4): Task, vocab, ind, index, testset, devset = preprocess(task, type) Train = [] global Test for i in range(len(Task)): vocab.index_dataset(Task[i], field_name='words', new_field_name='words') if i in ind: list = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] rawsupport0 = [] rawsupport1 = [] while (len(rawsupport0) == 0 or len(rawsupport1) == 0): slice = random.sample(list, 4) another = [x for x in list if x not in slice] train = Task[i][another] for inn in slice: if Task[i][inn]['label'] == -1: rawsupport0.append(inn) else: rawsupport1.append(inn) support0 = Task[i][rawsupport0] support1 = Task[i][rawsupport1] else: length = len(Task[i]) list = [x - 1 for x in range(length)] rawsupport0 = [] rawsupport1 = [] while (len(rawsupport0) == 0 or len(rawsupport1) == 0): slice = random.sample(list, 20) another = [x for x in list if x not in slice] train = Task[i][another] for inn in slice: if Task[i][inn]['label'] == -1: rawsupport0.append(inn) else: rawsupport1.append(inn) support0 = Task[i][rawsupport0] support1 = Task[i][rawsupport1] if i == index: Test = Triple(testset, support0, support1) Dev = Triple(devset, support0, support1) Train.append(Triple(train, support0, support1)) for i in range(len(Train)): Train[i].batch.set_input('words') Train[i].support0.set_input('words') Train[i].support1.set_input('words') Train[i].batch.set_target('onehot') Train[i].support0.set_target('onehot') Train[i].support1.set_target('onehot') Train[i].batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Train[i].support0.apply(lambda x: len(x['words']), new_field_name='seq_len') Train[i].support1.apply(lambda x: len(x['words']), new_field_name='seq_len') Test.batch.set_input('words') Test.support0.set_input('words') Test.support1.set_input('words') Test.batch.set_target('onehot') Test.support0.set_target('onehot') Test.support1.set_target('onehot') Test.batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Test.support0.apply(lambda x: len(x['words']), new_field_name='seq_len') Test.support1.apply(lambda x: len(x['words']), new_field_name='seq_len') Dev.batch.set_input('words') Dev.support0.set_input('words') Dev.support1.set_input('words') Dev.batch.set_target('onehot') Dev.support0.set_target('onehot') Dev.support1.set_target('onehot') Dev.batch.apply(lambda x: len(x['words']), new_field_name='seq_len') Dev.support0.apply(lambda x: len(x['words']), new_field_name='seq_len') Dev.support1.apply(lambda x: len(x['words']), new_field_name='seq_len') Train_batch = [] for i in range(len(Train)): if i in ind: sampler = BucketSampler(num_buckets=1, batch_size=batch, seq_len_field_name='seq_len') Train_batch.append(Triple(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support0, Train[i].support1)) else: sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len') Train_batch.append(Triple(Batch(batch_size=batch, dataset=Train[i].batch, sampler=sampler), Train[i].support0, Train[i].support1)) sampler = BucketSampler(batch_size=batch, seq_len_field_name='seq_len') Test_batch = Triple(Batch(batch_size=batch, dataset=Test.batch, sampler=sampler), Test.support0, Test.support1) Dev_batch = Triple(Batch(batch_size=batch, dataset=Dev.batch, sampler=sampler), Dev.support0, Dev.support1) return Train_batch, Dev_batch, Test_batch, len(vocab)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test')) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, evaluate_callback]) trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=2, n_epochs=n_epochs, dev_data=data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric( tag_vocab=data_bundle.get_vocab('target'), encoding_type=encoding_type), dev_batch_size=batch_size, callbacks=callbacks, device=device, test_use_tqdm=False, use_tqdm=True, print_every=300, save_path=None) trainer.train(load_best_model=False)
def main(): if args.do_eval: torch.multiprocessing.set_start_method('spawn', force=True) if args.model == 'bert': model = BertCRF(embed, [data_bundle.get_vocab('target')], encoding_type='bioes') else: model = StackedTransformersCRF( tag_vocabs=[data_bundle.get_vocab('target')], embed=embed, num_layers=num_layers, d_model=d_model, n_head=n_heads, feedforward_dim=dim_feedforward, dropout=trans_dropout, after_norm=after_norm, attn_type=attn_type, bi_embed=None, fc_dropout=fc_dropout, pos_embed=pos_embed, scale=attn_type == 'transformer') model = torch.nn.DataParallel(model) if args.do_eval: if os.path.exists(os.path.expanduser(args.saved_model)): print("Load checkpoint from {}".format( os.path.expanduser(args.saved_model))) model = torch.load(args.saved_model) model.to('cuda') print('model to CUDA') optimizer = AdamW(model.parameters(), lr=lr, eps=1e-8) callbacks = [] clip_callback = GradientClipCallback(clip_type='value', clip_value=5) evaluate_callback = EvaluateCallback(data_bundle.get_dataset('test')) checkpoint_callback = CheckPointCallback(os.path.join( directory, 'model.pth'), delete_when_train_finish=False, recovery_fitlog=True) if warmup_steps > 0: warmup_callback = WarmupCallback(warmup_steps, schedule='linear') callbacks.append(warmup_callback) callbacks.extend([clip_callback, checkpoint_callback, evaluate_callback]) if not args.do_eval: trainer = Trainer(data_bundle.get_dataset('train'), model, optimizer, batch_size=batch_size, sampler=BucketSampler(), num_workers=no_cpu, n_epochs=args.n_epochs, dev_data=data_bundle.get_dataset('dev'), metrics=SpanFPreRecMetric( tag_vocab=data_bundle.get_vocab('target'), encoding_type=encoding_type), dev_batch_size=batch_size, callbacks=callbacks, device=args.device, test_use_tqdm=True, use_tqdm=True, print_every=300, save_path=os.path.join(directory, 'best')) trainer.train(load_best_model=True) predictor = Predictor(model) predict(os.path.join(directory, 'predictions_dev.tsv'), data_bundle, predictor, 'dev') predict(os.path.join(directory, 'predictions_test.tsv'), data_bundle, predictor, 'test') else: print('Predicting') # predictions of multiple files torch.multiprocessing.freeze_support() model.share_memory() predictor = Predictor(model) if len(files) > multiprocessing.cpu_count(): with torch.multiprocessing.Pool(processes=no_cpu) as p: with tqdm(total=len(files)) as pbar: for i, _ in enumerate( p.imap_unordered( partial(predict, data_bundle=data_bundle, predictor=predictor, predict_on='train', do_eval=args.do_eval), files)): pbar.update() else: for file in tqdm(files): predict(file, data_bundle, predictor, 'train', args.do_eval)