def train(config):

    vocab = Vocab(config)
    train_data = vocab.get_train_dev_test()
    train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data]
    train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data]
    train_data = train1 + train2
    train_dataset = BuildDataSet(train_data)
    train_sampler = torch.utils.data.distributed.DistributedSampler(
        train_dataset)
    train_load = DataLoader(dataset=train_dataset,
                            batch_size=config.batch_size,
                            shuffle=False,
                            collate_fn=collate_fn,
                            sampler=train_sampler)

    for model_name in config.model_name:
        if config.local_rank in [0, -1]:
            msg = 'model_name:{},train_nums:{},train_iter:{},batch_size:{}'
            print(
                msg.format(model_name, len(train_data), len(train_load),
                           config.batch_size))

        train_process(config, train_load, train_sampler, model_name)
        torch.distributed.barrier()
示例#2
0
def k_fold(config):

    vocab = Vocab(config)
    # vocab.add_words()
    # vocab.build_bert_vocab()
    train, test = vocab.get_train_dev_test()

    test_data = [(x[0] + ' ' + x[1], x[2]) for x in test]
    test_dataset = BuildDataSet(test_data)
    test_load = DataLoader(dataset=test_dataset,
                           batch_size=config.batch_size,
                           shuffle=False,
                           collate_fn=collate_fn)

    kf = KFold(n_splits=config.kfold, shuffle=False, random_state=config.seed)

    for k, (train_index, dev_index) in enumerate(kf.split(train)):
        #         pdb.set_trace()
        train_data, valid_data = train[train_index], train[dev_index]
        train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data]
        train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data]
        train_data = train1 + train2
        valid_data = [(x[0] + ' ' + x[1], x[2]) for x in valid_data]

        train_dataset = BuildDataSet(train_data)

        train_sampler = torch.utils.data.distributed.DistributedSampler(
            train_dataset)
        train_load = DataLoader(dataset=train_dataset,
                                batch_size=config.batch_size,
                                shuffle=False,
                                collate_fn=collate_fn,
                                sampler=train_sampler)

        valid_dataset = BuildDataSet(valid_data)
        valid_sampler = torch.utils.data.distributed.DistributedSampler(
            valid_dataset)
        valid_load = DataLoader(dataset=valid_dataset,
                                batch_size=config.batch_size,
                                shuffle=False,
                                collate_fn=collate_fn,
                                sampler=valid_sampler)

        if config.local_rank in [0, -1]:
            msg = '{} fold,train_nums:{},train_iter:{},dev_nums:{},dev_iter:{},batch_size:{},test_nums:{},test_iter:{}'
            print(
                msg.format(k + 1, len(train_data), len(train_load),
                           len(valid_data), len(valid_load), config.batch_size,
                           len(test_data), len(test_load)))

        train_process(config, train_load, valid_load, test_load, k,
                      train_sampler)
        torch.distributed.barrier()
示例#3
0
        attention_mask.append(x['attention_mask'] +
                              (max_len - len(x['attention_mask'])) * [0])
        label.append(int(y))

    input_ids = torch.tensor(data=input_ids).type(torch.LongTensor)
    token_type_ids = torch.tensor(data=token_type_ids).type(torch.LongTensor)
    attention_mask = torch.tensor(data=attention_mask).type(torch.LongTensor)
    label = torch.tensor(data=label).type(torch.LongTensor)
    return input_ids, token_type_ids, attention_mask, label


print("***********load test data*****************")

config = roBerta_Config()
vocab = Vocab()
train_data, valid_data, test_data = vocab.get_train_dev_test()
test_dataset = BuildDataSet(test_data)
test_load = DataLoader(dataset=test_dataset,
                       batch_size=config.batch_size,
                       shuffle=False,
                       collate_fn=collate_fn)

print("***********load model weight*****************")

model_config = BertConfig.from_pretrained(
    pretrained_model_name_or_path="bert_source/bert_config.json")
model = BertForSequenceClassification(config=model_config)
model.load_state_dict(torch.load('save_bert/best_model.pth.tar'))
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)
config.device = device