def train(config): vocab = Vocab(config) train_data = vocab.get_train_dev_test() train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data] train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data] train_data = train1 + train2 train_dataset = BuildDataSet(train_data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_load = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) for model_name in config.model_name: if config.local_rank in [0, -1]: msg = 'model_name:{},train_nums:{},train_iter:{},batch_size:{}' print( msg.format(model_name, len(train_data), len(train_load), config.batch_size)) train_process(config, train_load, train_sampler, model_name) torch.distributed.barrier()
def k_fold(config): vocab = Vocab(config) # vocab.add_words() # vocab.build_bert_vocab() train, test = vocab.get_train_dev_test() test_data = [(x[0] + ' ' + x[1], x[2]) for x in test] test_dataset = BuildDataSet(test_data) test_load = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn) kf = KFold(n_splits=config.kfold, shuffle=False, random_state=config.seed) for k, (train_index, dev_index) in enumerate(kf.split(train)): # pdb.set_trace() train_data, valid_data = train[train_index], train[dev_index] train1 = [(x[0] + ' ' + x[1], x[2]) for x in train_data] train2 = [(x[1] + ' ' + x[0], x[2]) for x in train_data] train_data = train1 + train2 valid_data = [(x[0] + ' ' + x[1], x[2]) for x in valid_data] train_dataset = BuildDataSet(train_data) train_sampler = torch.utils.data.distributed.DistributedSampler( train_dataset) train_load = DataLoader(dataset=train_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=train_sampler) valid_dataset = BuildDataSet(valid_data) valid_sampler = torch.utils.data.distributed.DistributedSampler( valid_dataset) valid_load = DataLoader(dataset=valid_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn, sampler=valid_sampler) if config.local_rank in [0, -1]: msg = '{} fold,train_nums:{},train_iter:{},dev_nums:{},dev_iter:{},batch_size:{},test_nums:{},test_iter:{}' print( msg.format(k + 1, len(train_data), len(train_load), len(valid_data), len(valid_load), config.batch_size, len(test_data), len(test_load))) train_process(config, train_load, valid_load, test_load, k, train_sampler) torch.distributed.barrier()
attention_mask.append(x['attention_mask'] + (max_len - len(x['attention_mask'])) * [0]) label.append(int(y)) input_ids = torch.tensor(data=input_ids).type(torch.LongTensor) token_type_ids = torch.tensor(data=token_type_ids).type(torch.LongTensor) attention_mask = torch.tensor(data=attention_mask).type(torch.LongTensor) label = torch.tensor(data=label).type(torch.LongTensor) return input_ids, token_type_ids, attention_mask, label print("***********load test data*****************") config = roBerta_Config() vocab = Vocab() train_data, valid_data, test_data = vocab.get_train_dev_test() test_dataset = BuildDataSet(test_data) test_load = DataLoader(dataset=test_dataset, batch_size=config.batch_size, shuffle=False, collate_fn=collate_fn) print("***********load model weight*****************") model_config = BertConfig.from_pretrained( pretrained_model_name_or_path="bert_source/bert_config.json") model = BertForSequenceClassification(config=model_config) model.load_state_dict(torch.load('save_bert/best_model.pth.tar')) device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') model = model.to(device) config.device = device