def test_CrossEntropyLoss(self): ce = loss.CrossEntropyLoss(pred="my_predict", target="my_truth") a = torch.randn(3, 5, requires_grad=False) b = torch.empty(3, dtype=torch.long).random_(5) ans = ce({"my_predict": a}, {"my_truth": b}) self.assertEqual(ans, torch.nn.functional.cross_entropy(a, b)) ce = loss.CrossEntropyLoss(pred="my_predict", target="my_truth", class_in_dim=1) a = torch.randn(3, 4, 3) b = torch.randint(3, (3, 3)) ans = ce({"my_predict": a}, {"my_truth": b}) self.assertAlmostEqual(ans.item(), torch.nn.functional.cross_entropy(a, b).item(), places=4) ce = loss.CrossEntropyLoss(pred="my_predict", target="my_truth", class_in_dim=2) a = torch.randn(3, 4, 3) b = torch.randint(3, (3, 4)) ans = ce({"my_predict": a}, {"my_truth": b}) self.assertAlmostEqual(ans.item(), torch.nn.functional.cross_entropy( a.transpose(1, 2), b).item(), places=4)
def test_losser3(self): # (2) with corrupted size pred_dict = {"pred": torch.zeros(16, 3), 'stop_fast_param': 0} target_dict = {'target': torch.zeros(16).long()} los = loss.CrossEntropyLoss() print(los(pred_dict=pred_dict, target_dict=target_dict))
def test_losser1(self): # (1) only input, targets passed pred_dict = {"pred": torch.zeros(4, 3)} target_dict = {'target': torch.zeros(4).long()} los = loss.CrossEntropyLoss() print(los(pred_dict=pred_dict, target_dict=target_dict))
def test_losser2(self): # (2) with corrupted size pred_dict = {"pred": torch.zeros(16, 3)} target_dict = {'target': torch.zeros(16, 3).long()} los = loss.CrossEntropyLoss() with self.assertRaises(RuntimeError): print(los(pred_dict=pred_dict, target_dict=target_dict))
def train(): seed = set_rng_seeds(1234) print('RNG SEED {}'.format(seed)) print('loading data') ds_list, word_v, tag_v = g_datasets['{}-{}'.format( g_args.ds, g_args.task)]() print(ds_list[0][:2]) embed = load_pretrain_emb(word_v, lang='zh' if g_args.ds == 'ctb' else 'en') g_model_cfg['num_cls'] = len(tag_v) print(g_model_cfg) g_model_cfg['init_embed'] = embed model = g_model_select[g_args.task.lower()](**g_model_cfg) def init_model(model): for p in model.parameters(): if p.size(0) != len(word_v): nn.init.normal_(p, 0.0, 0.05) init_model(model) train_data = ds_list[0] dev_data = ds_list[2] test_data = ds_list[1] print(tag_v.word2idx) if g_args.task in ['pos', 'ner']: padding_idx = tag_v.padding_idx else: padding_idx = -100 print('padding_idx ', padding_idx) loss = FN.CrossEntropyLoss(padding_idx=padding_idx) metrics = { 'pos': (None, FN.AccuracyMetric()), 'ner': ('f', FN.core.metrics.SpanFPreRecMetric( tag_vocab=tag_v, encoding_type='bmeso', ignore_labels=[''], )), 'cls': (None, FN.AccuracyMetric()), 'nli': (None, FN.AccuracyMetric()), } metric_key, metric = metrics[g_args.task] device = 'cuda' if torch.cuda.is_available() else 'cpu' ex_param = [x for x in model.parameters( ) if x.requires_grad and x.size(0) != len(word_v)] optim_cfg = [{'params': model.enc.embedding.parameters(), 'lr': g_args.lr*0.1}, {'params': ex_param, 'lr': g_args.lr, 'weight_decay': g_args.w_decay}, ] trainer = FN.Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metric, metric_key=metric_key, optimizer=torch.optim.Adam(optim_cfg), n_epochs=g_args.ep, batch_size=g_args.bsz, print_every=10, validate_every=3000, device=device, use_tqdm=False, prefetch=False, save_path=g_args.log, callbacks=[MyCallback()]) trainer.train() tester = FN.Tester(data=test_data, model=model, metrics=metric, batch_size=128, device=device) tester.test()
def train(): print('loading data') ds_list, word_v, tag_v = g_datasets['{}-{}'.format(g_args.ds, g_args.task)]() print(ds_list[0][:2]) print(len(ds_list[0]), len(ds_list[1]), len(ds_list[2])) embed = load_pretrain_emb(word_v, lang='zh' if g_args.ds == 'ctb' else 'en') g_model_cfg['num_cls'] = len(tag_v) print(g_model_cfg) g_model_cfg['init_embed'] = embed model = g_model_select[g_args.task.lower()](**g_model_cfg) def init_model(model): for p in model.parameters(): if p.size(0) != len(word_v): if len(p.size()) < 2: nn.init.constant_(p, 0.0) else: nn.init.normal_(p, 0.0, 0.05) init_model(model) train_data = ds_list[0] dev_data = ds_list[1] test_data = ds_list[2] print(tag_v.word2idx) if g_args.task in ['pos', 'ner']: padding_idx = tag_v.padding_idx else: padding_idx = -100 print('padding_idx ', padding_idx) loss = FN.CrossEntropyLoss(padding_idx=padding_idx) metrics = { 'pos': (None, FN.AccuracyMetric()), 'ner': ('f', FN.core.metrics.SpanFPreRecMetric( tag_vocab=tag_v, encoding_type='bmeso', ignore_labels=[''], )), 'cls': (None, FN.AccuracyMetric()), 'nli': (None, FN.AccuracyMetric()), } metric_key, metric = metrics[g_args.task] device = 'cuda' if torch.cuda.is_available() else 'cpu' params = [(x, y) for x, y in list(model.named_parameters()) if y.requires_grad and y.size(0) != len(word_v)] no_decay = ['bias', 'LayerNorm.bias', 'LayerNorm.weight'] print([n for n, p in params]) optim_cfg = [ #{'params': model.enc.embedding.parameters(), 'lr': g_args.lr*0.1}, { 'params': [p for n, p in params if not any(nd in n for nd in no_decay)], 'lr': g_args.lr, 'weight_decay': 1.0 * g_args.w_decay }, { 'params': [p for n, p in params if any(nd in n for nd in no_decay)], 'lr': g_args.lr, 'weight_decay': 0.0 * g_args.w_decay } ] print(model) trainer = FN.Trainer(model=model, train_data=train_data, dev_data=dev_data, loss=loss, metrics=metric, metric_key=metric_key, optimizer=torch.optim.Adam(optim_cfg), n_epochs=g_args.ep, batch_size=g_args.bsz, print_every=100, validate_every=1000, device=device, use_tqdm=False, prefetch=False, save_path=g_args.log, sampler=FN.BucketSampler(100, g_args.bsz, C.INPUT_LEN), callbacks=[MyCallback()]) print(trainer.train()) tester = FN.Tester(data=test_data, model=model, metrics=metric, batch_size=128, device=device) print(tester.test())
def RUN(data, label, split, modelfunc, classnum=2, epochs=10): assert (len(data) == len(label)) if split == None: dataset = fastNLP.DataSet({'raw_sentence': data, 'label_str': label}) else: dataset = fastNLP.DataSet({ 'raw_sentence': data, 'label_str': label, 'split': split }) dataset.drop(lambda x: len(x['raw_sentence']) == 0) #[dataset.append(fastNLP.DataSet({'raw_sentence': data[x], 'label': label[x]})) for x in range(len(data))] dataset.apply(lambda x: int(float(x['label_str'])), new_field_name='label', is_target=True) dataset.apply(lambda x: x['raw_sentence'].split(), new_field_name='word_str') vocab = fastNLP.Vocabulary(min_freq=1) dataset.apply(lambda x: [vocab.add(word) for word in x['word_str']]) if split == None: traindata, testdata = dataset.split(0.1) #print(len(traindata), len(testdata)) else: traindata = dataset[:] testdata = dataset[:] traindata.drop(lambda x: x['split'] != 'train') testdata.drop(lambda x: x['split'] != 'test') #print(len(traindata), len(testdata)) traindata.apply(lambda x: [vocab.to_index(word) for word in x['word_str']], new_field_name='word_seq', is_input=True) testdata.apply(lambda x: [vocab.to_index(word) for word in x['word_str']], new_field_name='word_seq', is_input=True) model = modelfunc(embed_num=len(vocab), embed_dim=100, num_classes=classnum, kernel_nums=(3, 4, 5), kernel_sizes=(3, 4, 5), padding=0, dropout=0) model.embed.dropout = torch.nn.Dropout(0.5) gloveemb = np.random.rand(len(vocab), 100) for i in range(len(vocab)): word = vocab.to_word(i) try: #print(word) #print(len(glove), word) #print(glove[word]) #input() emb = glove[word] gloveemb[i, :] = emb except: pass model.addembed(gloveemb) trainer = fastNLP.Trainer(model=model, train_data=traindata, dev_data=testdata, loss=fastNLP.CrossEntropyLoss(), metrics=fastNLP.AccuracyMetric(), use_cuda=True, n_epochs=epochs, check_code_level=-1) trainer.train()
def test_CrossEntropyLoss(self): ce = loss.CrossEntropyLoss(pred="my_predict", target="my_truth") a = torch.randn(3, 5, requires_grad=False) b = torch.empty(3, dtype=torch.long).random_(5) ans = ce({"my_predict": a}, {"my_truth": b}) self.assertEqual(ans, torch.nn.functional.cross_entropy(a, b))