Exemplo n.º 1
0
class Trainer(object):
    def __init__(self, config):
        self.config = config
        self.data_processor = DataProcessor("/Users/a5560648/workspace/tutor/data", max_len=config["max_len"])
        self.model = BertClassifier(config=config)

    def train(self):
        data_loader = DataLoader(self.data_processor.get_dataset(), batch_size=config["batch_size"], shuffle=True, drop_last=True)
        optimizer = torch.optim.Adam(self.model.parameters(), lr=self.config["lr"]) 
        loss_fn = torch.nn.functional.cross_entropy
        for epoch in range(self.config["epoch"]):
            with tqdm(total=len(data_loader)) as pbar:
                for input_ids, token_type_ids, attention_mask, labels in data_loader:
                    optimizer.zero_grad()
                    output = self.model(input_ids, token_type_ids, attention_mask)
                    loss = loss_fn(output, labels)
                    loss.backward()
                    optimizer.step()
                    pbar.update(1)
Exemplo n.º 2
0
def main():
    args = parse_arguments()
    # argument setting
    print("=== Argument Setting ===")
    print("src: " + args.src)
    print("tgt: " + args.tgt)
    print("seed: " + str(args.seed))
    print("train_seed: " + str(args.train_seed))
    print("model_type: " + str(args.model))
    print("max_seq_length: " + str(args.max_seq_length))
    print("batch_size: " + str(args.batch_size))
    print("pre_epochs: " + str(args.pre_epochs))
    print("num_epochs: " + str(args.num_epochs))
    print("AD weight: " + str(args.alpha))
    print("KD weight: " + str(args.beta))
    print("temperature: " + str(args.temperature))
    set_seed(args.train_seed)

    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    print("=== Processing datasets ===")
    if args.src in ['blog', 'airline', 'imdb']:
        src_x, src_y = CSV2Array(
            os.path.join('data', args.src, args.src + '.csv'))
    else:
        src_x, src_y = XML2Array(
            os.path.join('data', args.src, 'negative.review'),
            os.path.join('data', args.src, 'positive.review'))

    src_x, src_test_x, src_y, src_test_y = train_test_split(
        src_x, src_y, test_size=0.2, stratify=src_y, random_state=args.seed)

    if args.tgt in ['blog', 'airline', 'imdb']:
        tgt_x, tgt_y = CSV2Array(
            os.path.join('data', args.tgt, args.tgt + '.csv'))
    else:
        tgt_x, tgt_y = XML2Array(
            os.path.join('data', args.tgt, 'negative.review'),
            os.path.join('data', args.tgt, 'positive.review'))

    tgt_train_x, tgt_test_y, tgt_train_y, tgt_test_y = train_test_split(
        tgt_x, tgt_y, test_size=0.2, stratify=tgt_y, random_state=args.seed)

    if args.model in ['roberta', 'distilroberta']:
        src_features = roberta_convert_examples_to_features(
            src_x, src_y, args.max_seq_length, tokenizer)
        src_test_features = roberta_convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = roberta_convert_examples_to_features(
            tgt_x, tgt_y, args.max_seq_length, tokenizer)
        tgt_train_features = roberta_convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)
    else:
        src_features = convert_examples_to_features(src_x, src_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        src_test_features = convert_examples_to_features(
            src_test_x, src_test_y, args.max_seq_length, tokenizer)
        tgt_features = convert_examples_to_features(tgt_x, tgt_y,
                                                    args.max_seq_length,
                                                    tokenizer)
        tgt_train_features = convert_examples_to_features(
            tgt_train_x, tgt_train_y, args.max_seq_length, tokenizer)

    # load dataset

    src_data_loader = get_data_loader(src_features, args.batch_size)
    src_data_eval_loader = get_data_loader(src_test_features, args.batch_size)
    tgt_data_train_loader = get_data_loader(tgt_train_features,
                                            args.batch_size)
    tgt_data_all_loader = get_data_loader(tgt_features, args.batch_size)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore=param.src_classifier_path)
        tgt_encoder = init_model(args,
                                 tgt_encoder,
                                 restore=param.tgt_encoder_path)
        discriminator = init_model(args,
                                   discriminator,
                                   restore=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)
        tgt_encoder = init_model(args, tgt_encoder)
        discriminator = init_model(args, discriminator)

    # train source model
    print("=== Training classifier for source domain ===")
    if args.pretrain:
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_data_loader)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(src_encoder, src_classifier, src_data_loader)
    evaluate(src_encoder, src_classifier, src_data_eval_loader)
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)

    for params in src_encoder.parameters():
        params.requires_grad = False

    for params in src_classifier.parameters():
        params.requires_grad = False

    # train target encoder by GAN
    print("=== Training encoder for target domain ===")
    if args.adapt:
        tgt_encoder.load_state_dict(src_encoder.state_dict())
        tgt_encoder = adapt(args, src_encoder, tgt_encoder, discriminator,
                            src_classifier, src_data_loader,
                            tgt_data_train_loader, tgt_data_all_loader)

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> source only <<<")
    evaluate(src_encoder, src_classifier, tgt_data_all_loader)
    print(">>> domain adaption <<<")
    evaluate(tgt_encoder, src_classifier, tgt_data_all_loader)
Exemplo n.º 3
0
    # model = RNNClassifier(text_field, embedding_dim, hidden_dim, rnn_type="GRU", bidir=False,
    #                      checkpoint_name='checkpoints/gru.pt')
    # in the above line, you can change rnn_type to either RNN_TANH, GRU, or LSTM to create a different network
    # you can also set bidir=True to create a bidirectional network
    # model = CNNClassifier(text_field, embedding_dim, num_filters=32, filter_sizes=[1, 2, 3, 5],
    #                      checkpoint_name='checkpoints/cnn.pt')
    tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased',
                                                           do_lower=True)
    train_iter, val_iter, test_iter, text_field, label_field = prep_torch_data(
        batch_size=32, transformer_tokenize=tokenizer)
    bert = transformers.BertModel.from_pretrained('bert-base-uncased')
    for i in bert.parameters():
        i.requires_grad = False
    model = BertClassifier(bert, checkpoint_name='checkpoints/bert.pt')

    optimizer = optim.Adam(model.parameters())
    # move everything to gpu if available
    device = ("cuda" if torch.cuda.is_available() else "cpu")
    if device == "cuda":
        model.cuda()
        torch.set_default_tensor_type('torch.cuda.FloatTensor')

    train(model,
          train_iter,
          val_iter,
          test_iter,
          optimizer,
          criterion,
          n_epochs=50,
          short_train=True,
          checkpoint_name=model.checkpoint_name,
Exemplo n.º 4
0
attention_mask['train'], attention_mask['val'], attention_mask[
    'test'] = attention_mask_[:nb_train], attention_mask_[
        nb_train:nb_train + nb_val], attention_mask_[-nb_test:]

datasets = {}
loader = {}
for split in ['train', 'val', 'test']:
    datasets[split] = Data.TensorDataset(input_ids[split],
                                         attention_mask[split], label[split])
    loader[split] = Data.DataLoader(datasets[split],
                                    batch_size=batch_size,
                                    shuffle=True)

# Training

optimizer = th.optim.Adam(model.parameters(), lr=bert_lr)
scheduler = lr_scheduler.MultiStepLR(optimizer, milestones=[30], gamma=0.1)


def train_step(engine, batch):
    global model, optimizer
    model.train()
    model = model.to(gpu)
    optimizer.zero_grad()
    (input_ids, attention_mask, label) = [x.to(gpu) for x in batch]
    optimizer.zero_grad()
    y_pred = model(input_ids, attention_mask)
    y_true = label.type(th.long)
    loss = F.cross_entropy(y_pred, y_true)
    loss.backward()
    optimizer.step()
Exemplo n.º 5
0
def main():

    # 参数设置
    batch_size = 4
    device = 'cuda' if torch.cuda.is_available() else 'cpu'
    epochs = 10
    learning_rate = 5e-6  #Learning Rate不宜太大

    # 获取到dataset
    train_dataset = CNewsDataset('data/cnews/cnews.train.txt')
    valid_dataset = CNewsDataset('data/cnews/cnews.val.txt')
    #test_data = load_data('cnews/cnews.test.txt')

    # 生成Batch
    train_dataloader = DataLoader(train_dataset,
                                  batch_size=batch_size,
                                  shuffle=True)
    valid_dataloader = DataLoader(valid_dataset,
                                  batch_size=batch_size,
                                  shuffle=False)
    #test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('bert-base-chinese')
    num_labels = len(train_dataset.labels)

    # 初始化模型
    model = BertClassifier(bert_config, num_labels).to(device)

    optimizer = AdamW(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    best_acc = 0

    for epoch in range(1, epochs + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率

        model.train()
        train_bar = tqdm(train_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in train_bar:
            model.zero_grad()
            train_bar.set_description('Epoch %i train' % epoch)

            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc

            loss.backward()
            optimizer.step()
            train_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)

        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        valid_bar = tqdm(valid_dataloader)
        for input_ids, token_type_ids, attention_mask, label_id in valid_bar:
            valid_bar.set_description('Epoch %i valid' % epoch)
            output = model(
                input_ids=input_ids.to(device),
                attention_mask=attention_mask.to(device),
                token_type_ids=token_type_ids.to(device),
            )

            loss = criterion(output, label_id.to(device))
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_id.to(device)).item() / len(
                pred_labels)  #acc
            accuracy += acc
            valid_bar.set_postfix(loss=loss.item(), acc=acc)

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'models/best_model.pkl')
Exemplo n.º 6
0
train_dataset = EmojiDataset('../../data/train_bert_sentences.npy',
                             '../../data/train_bert_labels.npy')
train_dataloader = DataLoader(train_dataset,
                              batch_size=64,
                              shuffle=False,
                              collate_fn=collate_fn)

test_dataset = EmojiDataset('../../data/test_bert_sentences.npy',
                            '../../data/test_bert_labels.npy')
test_dataloader = DataLoader(test_dataset,
                             batch_size=128,
                             shuffle=False,
                             collate_fn=collate_fn)

optimizer = AdamW(model.parameters(), lr=2e-5, correct_bias=False)
# optimizer = nn.DataParallel(optimizer)

total_steps = len(train_dataloader) * epochs
# scheduler = get_linear_schedule_with_warmup(
#   optimizer,
#   num_warmup_steps = 0,
#   num_training_steps = total_steps
# )

scheduler = get_constant_schedule_with_warmup(
    optimizer,
    num_warmup_steps=1000,
)

for epoch in range(epochs):
def main(paras):

    logger = logging.getLogger(__name__)
    if paras.save_log_file:
        logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S',
                            level=paras.logging_level,
                            filename=f'{paras.log_save_path}/{paras.train_log_file}',
                            filemode='w')
    else:
        logging.basicConfig(format='%(asctime)s - %(levelname)s - %(name)s -   %(message)s',
                            datefmt='%m/%d/%Y %H:%M:%S',
                            level=paras.logging_level, )

    device = 'cuda' if torch.cuda.is_available() else 'cpu'

    logger.info(f'Loading model: {paras.model_name}')
    tokenizer = BertTokenizer.from_pretrained(paras.model_name)
    bert = BertModel.from_pretrained(paras.model_name)


    train_dataset = RE_Dataset(paras, 'train')
    train_dataloaer = DataLoader(train_dataset, batch_size=paras.batch_size,
                                 shuffle=paras.shuffle, drop_last=paras.drop_last)
    label_to_index = train_dataset.label_to_index
    special_token_list = list(train_dataset.special_token_set)
    # fixme: add special token to tokenizer
    special_tokens_dict = {'additional_special_tokens': special_token_list}
    tokenizer.add_special_tokens(special_tokens_dict)
    # bert.resize_token_embeddings(len(tokenizer))

    test_dataset = RE_Dataset(paras, 'test')
    test_dataloader = DataLoader(test_dataset, batch_size=paras.batch_size,
                                 shuffle=paras.shuffle, drop_last=paras.drop_last)

    bert_classifier = BertClassifier(bert, paras.hidden_size, paras.label_number,
                                     paras.dropout_prob)

    if paras.optimizer == 'adam':
        logger.info('Loading Adam optimizer.')
        optimizer = torch.optim.Adam(bert_classifier.parameters(), lr=paras.learning_rate)
    elif paras.optimizer == 'adamw':
        logger.info('Loading AdamW optimizer.')
        no_decay = [ 'bias', 'LayerNorm.weight' ]
        optimizer_grouped_parameters = [
            {'params': [ p for n, p in bert_classifier.named_parameters() if not any(nd in n for nd in no_decay) ],
             'weight_decay': 0.01},
            {'params': [ p for n, p in bert_classifier.named_parameters() if any(nd in n for nd in no_decay) ],
             'weight_decay': 0.0}
        ]
        optimizer = AdamW(optimizer_grouped_parameters, lr=paras.learning_rate,
                          eps=args.adam_epsilon)
    else:
        logger.warning(f'optimizer must be "Adam" or "AdamW", but got {paras.optimizer}.')
        logger.info('Loading Adam optimizer.')
        optimizer = torch.optim.Adam(bert_classifier.parameters(),
                                     lr=paras.learning_rate)


    logger.info('Training Start.')
    best_eval = {'acc': 0, 'precision': 0, 'recall': 0, 'f1': 0, 'loss': 0}
    for epoch in range(paras.num_train_epochs):
        epoch_loss = 0
        bert_classifier.train()
        for step, batch in enumerate(train_dataloaer):
            optimizer.zero_grad()

            batch_data, batch_label = batch

            encoded_data = tokenizer(batch_data,
                                     padding=True,
                                     truncation=True,
                                     return_tensors='pt',
                                     max_length=paras.max_sequence_length)

            label_tensor = batch_label_to_idx(batch_label, label_to_index)

            loss = bert_classifier(encoded_data, label_tensor)

            epoch_loss += loss_to_int(loss)

            logging.info(f'epoch: {epoch}, step: {step}, loss: {loss:.4f}')

            # fixme: del
            # acc, precision, recall, f1 = evaluation(bert_classifier, tokenizer, test_dataloader,
            #                                         paras.max_sequence_length, label_to_index)
            # logger.info(f'Accuracy: {acc:.4f}, Precision: {precision:.4f}, '
            #             f'Recall: {recall:.4f}, F1-score: {f1:.4f}')

            loss.backward()
            optimizer.step()

        epoch_loss = epoch_loss / len(train_dataloaer)

        acc, precision, recall, f1 = evaluation(bert_classifier, tokenizer, test_dataloader,
                                                paras.max_sequence_length, label_to_index)

        logging.info(f'Epoch: {epoch}, Epoch-Average Loss: {epoch_loss:.4f}')
        logger.info(f'Accuracy: {acc:.4f}, Precision: {precision:.4f}, '
                    f'Recall: {recall:.4f}, F1-score: {f1:.4f}')

        if best_eval['loss'] == 0 or f1 > best_eval['f1']:
            best_eval['loss'] = epoch_loss
            best_eval['acc'] = acc
            best_eval['precision'] = precision
            best_eval['recall'] = recall
            best_eval['f1'] = f1
            torch.save(bert_classifier, f'{paras.log_save_path}/{paras.model_save_name}')

            with open(f'{paras.log_save_path}/{paras.checkpoint_file}', 'w') as wf:
                wf.write(f'Save time: {time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())}\n')
                wf.write(f'Best F1-score: {best_eval["f1"]:.4f}\n')
                wf.write(f'Precision: {best_eval["precision"]:.4f}\n')
                wf.write(f'Recall: {best_eval["recall"]:.4f}\n')
                wf.write(f'Accuracy: {best_eval["acc"]:.4f}\n')
                wf.write(f'Epoch-Average Loss: {best_eval["loss"]:.4f}\n')

            logger.info(f'Updated model, best F1-score: {best_eval["f1"]:.4f}\n')

    logger.info(f'Train complete, Best F1-score: {best_eval["f1"]:.4f}.')
Exemplo n.º 8
0
def train(dataloader,
          head_trans,
          body_trans,
          classifier,
          load_model=False,
          save_model=True,
          num_epochs=2):

    torch.backends.cudnn.benchmark = True
    # device = 'cuda' if torch.cuda.is_available() else 'cpu'
    device = 'cpu'
    print(device)
    load_model = load_model
    save_model = save_model

    learning_rate = 3e-3
    num_epochs = num_epochs

    # For tensorboard
    writer = SummaryWriter('runs/bert')
    step = 0

    # Initialize Model
    model = BertClassifier(head_trans, body_trans, classifier).to(device)

    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=learning_rate)

    if load_model:
        model, optimizer, step = load_checkpoint(
            torch.load('bert_chkpnt/my_checkpoint.pth.tar'), model, optimizer)
        return model

    for epoch in range(num_epochs):
        if save_model:
            checkpoint = {
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'step': step
            }
            save_checkpoint(checkpoint)

        loop = tqdm(enumerate(dataloader), total=len(dataloader), leave=False)

        for batch, (head, body, stance) in loop:

            outputs = model(head.to(device), body.to(device))
            breakpoint()
            loss = criterion(outputs.float(), stance.to(device).long())

            writer.add_scalar('Training Loss', loss.item(), step)
            step += 1

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Update progress bar
            loop.set_description(f'Epoch [{epoch+1}/{num_epochs}]')
            loop.set_postfix(loss=loss.item())

            running_loss += loss.item()
            running_accuracy += (
                (torch.argmax(outputs, dim=1)
                 == stance.to(device)).sum().item()) / BATCH_SIZE
            if (batch + 1) % 10 == 0:
                writer.add_scalar('Running Loss', running_loss / 10,
                                  epoch * len(dataloader) + batch)
                writer.add_scalar('Running Accuracy', running_accuracy / 10,
                                  epoch * len(dataloader) + batch)

                running_loss = 0.0
                running_accuracy = 0

    return model
Exemplo n.º 9
0
def main():
    device = torch.device('cuda:3')
    # 获取到dataset
    print('加载训练数据')
    train_data = load_data('dataset/train.csv')
    print('加载验证数据')
    valid_data = load_data('dataset/test.csv')
    # test_data = load_data('cnews/cnews.test.txt')

    batch_size = 16

    # 生成Batch
    print('生成batch')
    train_dataloader = DataLoader(train_data,
                                  batch_size=batch_size,
                                  shuffle=True,
                                  num_workers=3)
    valid_dataloader = DataLoader(valid_data,
                                  batch_size=batch_size,
                                  shuffle=False,
                                  num_workers=3)
    # test_dataloader = DataLoader(valid_data, batch_size=batch_size, shuffle=False)

    # 读取BERT的配置文件
    bert_config = BertConfig.from_pretrained('./chinese_wwm_pytorch')
    bert_config.num_labels = num_labels
    print(bert_config)

    # 初始化模型
    model = BertClassifier(bert_config)
    # model.to(device)

    # 参数设置
    EPOCHS = 20
    learning_rate = 5e-6  # Learning Rate不宜太大
    optimizer = AdamW(model.parameters(), lr=learning_rate)
    # 损失函数采用交叉熵
    criterion = nn.CrossEntropyLoss()

    with open('output.txt', 'w') as wf:
        wf.write('Batch Size: ' + str(batch_size) + '\tLearning Rate: ' +
                 str(learning_rate) + '\n')

    best_acc = 0
    # 设置并行训练,模型默认是把参数放在device[0]对应的gpu编号的gpu上,所以这里应该和上面设置的cuda:2对应
    net = torch.nn.DataParallel(model, device_ids=[3, 4])
    net.to(device)
    # model.module.avgpool = nn.AdaptiveAvgPool2d(7)
    # 开始训练
    for Epoch in range(1, EPOCHS + 1):
        losses = 0  # 损失
        accuracy = 0  # 准确率
        print('Epoch:', Epoch)

        model.train()
        for batch_index, batch in enumerate(train_dataloader):
            # print(batch_index)
            # print(batch)
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            # 将三个输入喂到模型中
            output = net(  # forward
                input_ids=input_ids,
                attention_mask=attention_mask,
                token_type_ids=token_type_ids,
            )

            loss = criterion(output, label_ids)
            losses += loss.item()

            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc
            # 打印训练过程中的准确率以及loss
            # print('Epoch: %d | Train: | Batch: %d / %d | Acc: %f | Loss: %f' % (Epoch, batch_index + 1, len(train_dataloader), acc, loss.item()))
            # 模型梯度置零,损失函数反向传播,优化更新
            model.zero_grad()
            loss.backward()
            optimizer.step()
            # torch.cuda.empty_cache()

        average_loss = losses / len(train_dataloader)
        average_acc = accuracy / len(train_dataloader)
        # 打印该epoch训练结果的
        print('\tTrain ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tTrain ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss)
        #     rf.write(output_to_file)

        # 验证
        model.eval()
        losses = 0  # 损失
        accuracy = 0  # 准确率
        # 在验证集上进行验证
        for batch_index, batch in enumerate(valid_dataloader):
            input_ids = batch[0].to(device)
            attention_mask = batch[1].to(device)
            token_type_ids = batch[2].to(device)
            label_ids = batch[3].to(device)
            with torch.no_grad():
                output = model(  # forward
                    input_ids=input_ids,
                    attention_mask=attention_mask,
                    token_type_ids=token_type_ids,
                )
            loss = criterion(output, label_ids)
            losses += loss.item()
            # 这里的两部操作都是直接对生成的结果张量进行操作
            pred_labels = torch.argmax(output, dim=1)  # 预测出的label
            acc = torch.sum(pred_labels == label_ids.to(device)).item() / len(
                pred_labels)  # acc
            accuracy += acc

        average_loss = losses / len(valid_dataloader)
        average_acc = accuracy / len(valid_dataloader)

        print('\tValid ACC:', average_acc, '\tLoss:', average_loss)
        # with open('output.txt', 'a') as rf:
        #     output_to_file = '\nEpoch: ' + str(Epoch) + '\tValid ACC:' + str(average_acc) + '\tLoss: ' + str(
        #         average_loss) + '\n'
        #     rf.write(output_to_file)

        if average_acc > best_acc:
            best_acc = average_acc
            torch.save(model.state_dict(), 'best_model_on_trainset.pkl')
Exemplo n.º 10
0
def main(args, f):
    # args = parse_arguments()
    set_seed(args.train_seed)
    if args.model in ['roberta', 'distilroberta']:
        tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
    else:
        tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')

    # preprocess data
    src_eval_loader, src_loader, tgt_all_loader, tgt_train_loader = get_all_dataloader(
        args, tokenizer)

    # load models
    if args.model == 'bert':
        src_encoder = BertEncoder()
        tgt_encoder = BertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'distilbert':
        src_encoder = DistilBertEncoder()
        tgt_encoder = DistilBertEncoder()
        src_classifier = BertClassifier()
    elif args.model == 'roberta':
        src_encoder = RobertaEncoder()
        tgt_encoder = RobertaEncoder()
        src_classifier = RobertaClassifier()
    else:
        src_encoder = DistilRobertaEncoder()
        tgt_encoder = DistilRobertaEncoder()
        src_classifier = RobertaClassifier()
    discriminator = Discriminator()

    # parallel models
    if torch.cuda.device_count() > 1:
        print('Let\'s use {} GPUs!'.format(torch.cuda.device_count()))
        src_encoder = nn.DataParallel(src_encoder)
        src_classifier = nn.DataParallel(src_classifier)
        tgt_encoder = nn.DataParallel(tgt_encoder)
        discriminator = nn.DataParallel(discriminator)

    if args.load:
        src_encoder = init_model(args,
                                 src_encoder,
                                 restore_path=param.src_encoder_path)
        src_classifier = init_model(args,
                                    src_classifier,
                                    restore_path=param.src_classifier_path)
        # tgt_encoder = init_model(args, tgt_encoder, restore_path=param.tgt_encoder_path)
        # discriminator = init_model(args, discriminator, restore_path=param.d_model_path)
    else:
        src_encoder = init_model(args, src_encoder)
        src_classifier = init_model(args, src_classifier)

    tgt_encoder = init_model(args, tgt_encoder)
    discriminator = init_model(args, discriminator)

    # train source model
    if args.pretrain:
        print("=== Training classifier for source domain ===")
        src_encoder, src_classifier = pretrain(args, src_encoder,
                                               src_classifier, src_loader)

        # save pretrained model
        save_model(args, src_encoder, param.src_encoder_path)
        save_model(args, src_classifier, param.src_classifier_path)

    # eval source model
    print("=== Evaluating classifier for source domain ===")
    evaluate(args, src_encoder, src_classifier, src_loader)
    src_acc = evaluate(args, src_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: No adapt acc on src data: {src_acc}\n')

    for params in src_encoder.parameters():
        params.requires_grad = False

    for params in src_classifier.parameters():
        params.requires_grad = False

    # adapt
    print("=== Adapt tgt encoder ===")
    tgt_encoder.load_state_dict(src_encoder.state_dict())
    if args.src_free:
        s_res_features = src_gmm(args, src_encoder, src_loader)
        src_loader = s_numpy_dataloader(s_res_features, args.batch_size)
        tgt_encoder = aad_adapt_src_free(args, src_encoder, tgt_encoder,
                                         discriminator, src_classifier,
                                         src_loader, tgt_train_loader,
                                         tgt_all_loader)
    else:
        tgt_encoder = aad_adapt(args, src_encoder, tgt_encoder, discriminator,
                                src_classifier, src_loader, tgt_train_loader,
                                tgt_all_loader)

    # save_model(args, tgt_encoder, param.tgt_encoder_path)

    # argument setting
    # print("=== Argument Setting ===")
    print(
        f"model_type: {args.model}; max_seq_len: {args.max_seq_length}; batch_size: {args.batch_size}; "
        f"pre_epochs: {args.pre_epochs}; num_epochs: {args.num_epochs}; adv weight: {args.alpha}; "
        f"KD weight: {args.beta}; temperature: {args.temperature}; src: {args.src}; tgt: {args.tgt}; "
        f'src_free: {args.src_free}; dp: {args.dp}; ent: {args.ent}')

    # eval target encoder on lambda0.1 set of target dataset
    print("=== Evaluating classifier for encoded target domain ===")
    print(">>> domain adaption <<<")
    tgt_acc = evaluate(args, tgt_encoder, src_classifier, tgt_all_loader)
    f.write(f'{args.src} -> {args.tgt}: DA acc on tgt data: {tgt_acc}\n')
    f.write(
        f"model_type: {args.model}; batch_size: {args.batch_size}; pre_epochs: {args.pre_epochs}; "
        f"num_epochs: {args.num_epochs}; src_free: {args.src_free}; src: {args.src}; "
        f"tgt: {args.tgt}; dp: {args.dp}; ent: {args.ent}\n\n")