## Tokenize and padding BERT_MODEL_PATH = '../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/' tokenizer = BertTokenizer.from_pretrained(BERT_MODEL_PATH) x_test = convert_lines(x_test,MAX_LEN,tokenizer) x_test_cuda = torch.tensor(x_test, dtype=torch.long).cuda() test_data = torch.utils.data.TensorDataset(x_test_cuda) test_loader = torch.utils.data.DataLoader(test_data, batch_size=BATCH_SIZE, shuffle=False) ## load fine-tuned model bert_config = BertConfig('../input/bert-pretrained-models/uncased_l-12_h-768_a-12/uncased_L-12_H-768_A-12/bert_config.json') net = BertForSequenceClassification(bert_config,num_labels=6) net.load_state_dict(torch.load("../input/bert-model3/bert_pytorch_v3.pt")) net.cuda() ## inference net.eval() result_1 = list() with torch.no_grad(): for (x_batch,) in test_loader: y_pred = net(x_batch) y_pred = torch.sigmoid(y_pred.cpu()).numpy()[:,0] result_1.extend(y_pred) result_1 = np.array(result_1) net = BertForSequenceClassification(bert_config,num_labels=6) net.load_state_dict(torch.load("../input/bert-model4/bert_pytorch_v4.pt"))
validate = True for fold in [ 1, ]: print('Fold{}:'.format(fold)) validate_idx = kfold[fold][1] train_idx = kfold[fold][0] # train_idx = list(range(nrows))[:int(nrows*0.8)] # validate_idx = list(range(nrows))[int(nrows*0.8):] model.load_state_dict( torch.load(os.path.join(models_path, 'bert_fold{}.bin'.format(fold)))) model.cuda() model.eval() for param in model.parameters(): param.requires_grad = False train_pred_fold = [] test_pred_fold = [] train_feature_fold = [] test_feature_fold = [] # on train_set train_dataset = TensorDataset(torch.tensor(x_train, dtype=torch.long), ) train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=False) for x in tqdm_notebook(train_loader):