Пример #1
0
## Tokenize and padding
BERT_MODEL_PATH = '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/'
tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH)
x_test = convert_lines(x_test,MAX_LEN,tokenizer)


x_test_cuda = torch.tensor(x_test, dtype=torch.long).cuda()
test_data = torch.utils.data.TensorDataset(x_test_cuda)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False)

## load fine-tuned model
bert_config = BertConfig('../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/bert_config.json')
net = BertForSequenceClassification(bert_config,num_labels=6)
net.load_state_dict(torch.load("../input/bert-model3/bert_pytorch_v3.pt"))
net.cuda()

## inference
net.eval()
result_1 = list()
with torch.no_grad():
  for (x_batch,) in test_loader:
    y_pred = net(x_batch)
    y_pred = torch.sigmoid(y_pred.cpu()).numpy()[:,0]
    result_1.extend(y_pred)
result_1 = np.array(result_1)



net = BertForSequenceClassification(bert_config,num_labels=6)
net.load_state_dict(torch.load("../input/bert-model4/bert_pytorch_v4.pt"))
validate = True

for fold in [
        1,
]:
    print('Fold{}:'.format(fold))

    validate_idx = kfold[fold][1]
    train_idx = kfold[fold][0]
    #     train_idx = list(range(nrows))[:int(nrows*0.8)]
    #     validate_idx = list(range(nrows))[int(nrows*0.8):]

    model.load_state_dict(
        torch.load(os.path.join(models_path, 'bert_fold{}.bin'.format(fold))))
    model.cuda()
    model.eval()
    for param in model.parameters():
        param.requires_grad = False

    train_pred_fold = []
    test_pred_fold = []
    train_feature_fold = []
    test_feature_fold = []
    # on train_set
    train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.long), )
    train_loader = torch.utils.data.DataLoader(train_dataset,
                                               batch_size=batch_size,
                                               shuffle=False)

    for x in tqdm_notebook(train_loader):