Exemplo n.º 1
0
    def __init__(self,
                 pretrained_model,
                 model_path,
                 model_config_path,
                 freeze_deberta=True,
                 max_seq_length=512,
                 batch_size=32,
                 num_labels=2):
        super().__init__()
        if pretrained_model.model_class_name == 'base':
            self.model = DebertaForSequenceClassification.from_pretrained(
                pretrained_model.model_class['pretrain_key'],
                num_labels=num_labels)
        elif pretrained_model.model_class_name == 'xxlarge-v2':
            self.model = DebertaV2ForSequenceClassification.from_pretrained(
                pretrained_model.model_class['pretrain_key'],
                num_labels=num_labels)

        self.tokenizer = DebertaTokenizer.from_pretrained(
            pretrained_model.model_class['pretrain_key'])
        self.max_seq_length = max_seq_length
        self.batch_size = batch_size
        self.dataset = NLIDataset_DeBERTa(model_path, pretrained_model,
                                          max_seq_length, batch_size)

        if freeze_deberta:
            for param in self.model.parameters():
                param.requires_grad = False
            for param in self.model.base_model.parameters():
                param.requires_grad = False
Exemplo n.º 2
0
def train_model(train_dataloader, validation_dataloader, labels):
    model = DebertaForSequenceClassification.from_pretrained('microsoft/deberta-base',
                                                             num_labels=13,
                                                             output_attentions=False,
                                                             output_hidden_states=False)
    model.cuda()
    optimizer = AdamW(model.parameters(), lr=2e-5, eps=1e-6)

    epochs = 4
    total_steps = len(train_dataloader) * epochs
    scheduler = get_linear_schedule_with_warmup(optimizer, num_warmup_steps=0, num_training_steps=total_steps)

    average_losses = []
    for epoch in range(0, epochs):
        total_loss = 0
        model.train()
        for step, batch in enumerate(train_dataloader):
            batch_ids = batch[0].to(device)
            batch_mask = batch[1].to(device)
            batch_labels = batch[2].to(device)
            model.zero_grad()
            outputs = model(batch_ids,
                            token_type_ids=None,
                            attention_mask=batch_mask,
                            labels=batch_labels)
            loss = outputs[0]
            total_loss = total_loss + loss.item()
            loss.backward()
            torch.nn.utils.clip_grad_norm_(model.parameters(), 1.0)
            optimizer.step()
            scheduler.step()
            avg_train_loss = total_loss / len(train_dataloader)
            average_losses.append(avg_train_loss)
            print("Average training loss:", avg_train_loss)

            model.eval()
            eval_acc = 0
            eval_steps = 0
            for batch in validation_dataloader:
                batch = tuple(t.to(device) for t in batch)
                batch_ids, batch_mask, batch_labels = batch
                with torch.no_grad():
                    outputs = model(batch_ids,
                                    token_type_ids=None,
                                    attention_mask=batch_mask)
                logits = outputs[0]
                logits = logits.detach().cpu().numpy()
                label_ids = batch_labels.to('cpu').numpy()
                predictions = np.argmax(logits, axis=1).flatten()
                flat_labels = labels.flatten()
                temp_eval_acc = np.sum(predictions == flat_labels) / len(flat_labels)
                eval_acc = eval_acc + temp_eval_acc
                eval_steps = eval_steps + 1
                total_acc = eval_acc / eval_steps
                print("  Accuracy:", total_acc)
    torch.save(model, "../DataFiles/bert_model")
Exemplo n.º 3
0
 def __init__(self, model_class):
     if model_class == 'base':
         self.params = self._get_base_params()
         self.model = DebertaForSequenceClassification.from_pretrained(self.params['pretrain_key']).to(device)
     elif model_class == 'xxlarge-v2':
         self.params = self._get_xxlarge_params()
         self.model = DebertaV2ForSequenceClassification.from_pretrained(self.params['pretrain_key']).to(device)
     else:
         NameError("Currently only supporting 'base' and 'xxlarge-v2' model.")
     self.tokenizer = DebertaTokenizer.from_pretrained(self.params['pretrain_key'])
     for param in self.model.base_model.parameters():
         param.requires_grad = False
Exemplo n.º 4
0
 def test_inference_classification_head(self):
     random.seed(0)
     np.random.seed(0)
     torch.manual_seed(0)
     torch.cuda.manual_seed_all(0)
     model = DebertaForSequenceClassification.from_pretrained("microsoft/deberta-base")
     input_ids = torch.tensor([[0, 31414, 232, 328, 740, 1140, 12695, 69, 46078, 1588, 2]])
     output = model(input_ids)[0]
     expected_shape = torch.Size((1, 2))
     self.assertEqual(output.shape, expected_shape)
     expected_tensor = torch.tensor([[0.0884, -0.1047]])
     self.assertTrue(torch.allclose(output, expected_tensor, atol=1e-4), f"{output}")
 def create_and_check_deberta_for_sequence_classification(
     self, config, input_ids, token_type_ids, input_mask, sequence_labels, token_labels, choice_labels
 ):
     config.num_labels = self.num_labels
     model = DebertaForSequenceClassification(config)
     model.to(torch_device)
     model.eval()
     result = model(input_ids, attention_mask=input_mask, token_type_ids=token_type_ids, labels=sequence_labels)
     self.parent.assertListEqual(list(result.logits.size()), [self.batch_size, self.num_labels])
     self.check_loss_output(result)
Exemplo n.º 6
0
def sequence_classify():
    """
     文件src/transformers/models/deberta/modeling_deberta.py的1169行有一些问题,所以会报错,维度不匹配的错误, 在BERT上是没有此种错误的
     RuntimeError: Index tensor must have the same number of dimensions as input tensor
     labels = torch.gather(labels, 0, label_index.view(-1))
    Returns:

    """
    from transformers import DebertaTokenizer, DebertaForSequenceClassification
    import torch
    tokenizer = DebertaTokenizer.from_pretrained('microsoft/deberta-base')
    model = DebertaForSequenceClassification.from_pretrained(
        'microsoft/deberta-base')
    inputs = tokenizer("Hello, my dog is cute", return_tensors="pt")
    labels = torch.tensor([1]).unsqueeze(0)  #假设标签为1, 这里以一个样本,Batch size 也是 1
    outputs = model(**inputs, labels=labels)
    loss = outputs.loss
    logits = outputs.logits
    print(loss)
    print(logits)
Exemplo n.º 7
0
def main():
    if len(sys.argv) == 3:
        FOLDER_NAME = sys.argv[1]
        EPOCH = sys.argv[2]
    else:
        print(
            'ERROR : Please insert correct arguments! python evaluate.py <folder_name> <chosen_epoch>'
        )
        return

    # Check GPU Availability
    if torch.cuda.is_available():
        device = torch.device('cuda')
        print('Using GPU!')
    else:
        device = torch.device('cpu')
        print('Using CPU :(')

    # Import Dataset
    print('#### Importing Dataset ####')
    df_val = openData('./snli/dev.jsonl')
    df_test = openData('./snli/test.jsonl')

    # Preprocessing : removing data with label -1
    df_val = removeMinVal(df_val)
    df_test = removeMinVal(df_test)

    BERT_MODEL = 'microsoft/deberta-base'
    # BERT_MODEL = 'roberta-base'
    # BERT_MODEL = 'distilbert-base-uncased'
    # BERT_MODEL = 'bert-base-uncased'
    # BERT_MODEL = 'albert-base-v2'

    tokenizer = DebertaTokenizer.from_pretrained(BERT_MODEL,
                                                 do_lower_case=True)

    print('Encoding validation data')
    encode_val = tokenizer(df_val.premise.tolist(),
                           df_val.hypothesis.tolist(),
                           return_tensors='pt',
                           padding='max_length',
                           max_length=MAX_LENGTH)

    labels_val = torch.tensor(df_val.label.values)

    print('Encoding test data')
    encode_test = tokenizer(df_test.premise.tolist(),
                            df_test.hypothesis.tolist(),
                            return_tensors='pt',
                            padding='max_length',
                            max_length=MAX_LENGTH)

    labels_test = torch.tensor(df_test.label.values)

    dataset_val = TensorDataset(encode_val['input_ids'],
                                encode_val['attention_mask'], labels_val)
    dataset_test = TensorDataset(encode_test['input_ids'],
                                 encode_test['attention_mask'], labels_test)

    model = DebertaForSequenceClassification.from_pretrained(
        BERT_MODEL,
        num_labels=len(label_dict),
        output_attentions=False,
        output_hidden_states=False)

    PARALLEL_GPU = False
    if torch.cuda.device_count() > 1:
        print("Let's use", torch.cuda.device_count(), "GPUs!")
        model = nn.DataParallel(model)
        PARALLEL_GPU = True
    model = model.to(device)

    model.load_state_dict(
        torch.load(
            f'./models/{FOLDER_NAME}/finetuned_model_epoch_{EPOCH}.model',
            map_location=torch.device('cuda')))

    print('#### Validation Data Result ####')
    dataloader_validation = DataLoader(dataset_val,
                                       sampler=SequentialSampler(dataset_val),
                                       batch_size=batch_size)

    _, predictions, true_vals = evaluate(model, device, dataloader_validation,
                                         PARALLEL_GPU)
    accuracy_per_class(predictions, true_vals)

    print('#### Test Data Result ####')

    dataloader_test = DataLoader(dataset_test,
                                 sampler=SequentialSampler(dataset_test),
                                 batch_size=batch_size)

    _, predictions, true_vals = evaluate(model, device, dataloader_test,
                                         PARALLEL_GPU)
    accuracy_per_class(predictions, true_vals)
Exemplo n.º 8
0
def main():

    device_ids=[0]

    init_lr = 1e-5
    max_epochs = 10
    max_length = 512
    batch_size = 1
    gradient_accu = 32 // batch_size

    num_label = 2

    train_mode = False

    prev_acc = 0.
    max_acc = 0.

    config = DebertaConfig.from_pretrained('microsoft/deberta-large')
    tknzr = DebertaTokenizer.from_pretrained('microsoft/deberta-large')
    DebertaConfig.num_labels = 2

    train_data, test_data = loadData.load_data()
    train_data = train_data + loadData.load_data_aug()

    train_input_ids, train_mask_ids, train_segment_ids, train_label_ids = get_features(train_data, max_length, tknzr)
    test_input_ids, test_mask_ids, test_segment_ids, test_label_ids = get_features(test_data, max_length, tknzr)


    # print(all_input_ids.shape)

    all_input_ids = torch.cat(train_input_ids, dim=0).long()
    all_input_mask_ids = torch.cat(train_mask_ids, dim=0).long()
    all_segment_ids = torch.cat(train_segment_ids, dim=0).long()
    all_label_ids = torch.Tensor(train_label_ids).long()
    train_dataloader = create_dataloader(all_input_ids, all_input_mask_ids, all_segment_ids, all_label_ids,
                                         batch_size=batch_size, train=True)

    all_input_ids = torch.cat(test_input_ids, dim=0).long()
    all_input_mask_ids = torch.cat(test_mask_ids, dim=0).long()
    all_segment_ids = torch.cat(test_segment_ids, dim=0).long()
    all_label_ids = torch.Tensor(test_label_ids).long()
    test_dataloader = create_dataloader(all_input_ids, all_input_mask_ids, all_segment_ids, all_label_ids,
                                        batch_size=batch_size, train=False)

    model = DebertaForSequenceClassification.from_pretrained('microsoft/deberta-large').cuda(device_ids[0])
    model = torch.nn.DataParallel(model, device_ids=device_ids)


    optimizer = transformers.AdamW(model.parameters(), lr=init_lr, eps=1e-8)
    optimizer.zero_grad()
    #scheduler = transformers.get_constant_schedule_with_warmup(optimizer, len(train_dataloader) // (batch_size * gradient_accu))
    #scheduler = transformers.get_linear_schedule_with_warmup(optimizer, len(train_dataloader) // (batch_size * gradient_accu), (len(train_dataloader) * max_epochs * 2) // (batch_size * gradient_accu), last_epoch=-1)

    if not train_mode:
        max_epochs = 1
        model.load_state_dict(torch.load("../model/model-deberta-1231.ckpt"))
    
    foutput = open("answer-deberta-large-test.txt", "w")

    global_step = 0
    for epoch in range(max_epochs):
        model.train()
        if train_mode:
            loss_avg = 0.
            for step, batch in enumerate(tqdm(train_dataloader, desc="Iteration")):
                global_step += 1
                batch = [t.cuda() for t in batch]
                input_id, input_mask, segment_id, label_id = batch
                loss, _ = model(input_ids=input_id, token_type_ids=segment_id, attention_mask=input_mask, labels=label_id)
                loss = torch.sum(loss)
                loss_avg += loss.item()
                loss = loss / (batch_size * gradient_accu)
                loss.backward()
                if global_step % gradient_accu == 0:
                    optimizer.step()
                    optimizer.zero_grad()
                    #if epoch == 0:
                        #scheduler.step()
            print(loss_avg / len(train_dataloader))

        model.eval()

        final_acc = 0.
        num_test_sample = 0
        tot = [0, 0]
        correct = [0, 0]
        countloop = 0
        for input_id, input_mask, segment_id, label_id in test_dataloader:
            countloop += 1
            input_id = input_id.cuda()
            input_mask = input_mask.cuda()
            segment_id = segment_id.cuda()
            label_id = label_id.cuda()

            with torch.no_grad():
                loss, logit = model(input_ids=input_id, token_type_ids=segment_id, attention_mask=input_mask, labels=label_id)
            logit = logit.detach().cpu().numpy()
            print(logit[0][0], logit[0][1], file = foutput)
            #print(logit)
            label_id = label_id.to('cpu').numpy()
            acc = np.sum(np.argmax(logit, axis=1) == label_id)
            pred = np.argmax(logit, axis=1)
            for i in range(label_id.shape[0]):
                tot[label_id[i]] += 1
                if pred[i] == label_id[i]:
                    correct[label_id[i]] += 1
            final_acc += acc
            num_test_sample += input_id.size(0)

        print("epoch:", epoch)
        print("final acc:", final_acc / num_test_sample)
        if train_mode and final_acc / num_test_sample > max_acc:
            max_acc = final_acc / num_test_sample
            print("save...")
            torch.save(model.state_dict(), "../model/model-deberta-1231.ckpt")
            print("finish")
        print("Max acc:", max_acc)
        '''
        if final_acc / num_test_sample <= prev_acc:
            for param_group in optimizer.param_groups:
                param_group['lr'] = param_group['lr'] * 0.8
        '''
        prev_acc = final_acc / num_test_sample
        tp = correct[1]
        tn = correct[0]
        fp = tot[1] - correct[1]
        fn = tot[0] - correct[0]
        rec = tp / (tp + fn + 1e-5)
        pre = tp / (tp + fp + 1e-5)
        print("recall:{0}, precision:{1}".format(rec, pre))
        print("f:", 2 * pre * rec / (pre + rec))
        print("acc:", (tp + tn) / (tp+tn+fp+fn))