def test(full_dataset, state_name): # load model and use weights we saved before. model = MyModel() model.load_state_dict( torch.load(f'mymodel_{state_name}.pth', map_location='cpu')) model.eval() criterion = RMSLELoss() # convert dataframe to tensor inputs = full_dataset.test[full_dataset.col].astype(float).values inputs = torch.tensor(inputs).float() # predict outputs = model(inputs) # get labels labels = full_dataset.test_target.values # RMSLE Loss loss = criterion(outputs, torch.from_numpy(labels)) test_loss = sqrt(loss / len(full_dataset.test)) print(f'Testing Loss: {test_loss:.6f}') #save the result result = full_dataset.test["id"].to_frame() result.insert(1, "visitors_pred", outputs.detach().numpy()) result.insert(2, "visitors_actual", labels) result.to_csv(f'result_{state_name}.csv', index=False)
def test(): # load model and use weights we saved before. model = MyModel() model.load_state_dict(torch.load('mymodel.pth', map_location='cpu')) model.eval() # load testing data data = pd.read_csv('test.csv', encoding='utf-8') label_col = [ 'Input_A6_024', 'Input_A3_016', 'Input_C_013', 'Input_A2_016', 'Input_A3_017', 'Input_C_050', 'Input_A6_001', 'Input_C_096', 'Input_A3_018', 'Input_A6_019', 'Input_A1_020', 'Input_A6_011', 'Input_A3_015', 'Input_C_046', 'Input_C_049', 'Input_A2_024', 'Input_C_058', 'Input_C_057', 'Input_A3_013', 'Input_A2_017' ] # ================================================================ # # if do some operations with training data, # do the same operations to the testing data in this block data = data.fillna(0) # ================================================================ # # convert dataframe to tensor, no need to rewrite inputs = data.values inputs = torch.tensor(inputs) # predict and save the result result = pd.DataFrame(columns=label_col) outputs = model(inputs.float()) for i in range(len(outputs)): tmp = outputs[i].detach().numpy() tmp = pd.DataFrame([tmp], columns=label_col) result = pd.concat([result, tmp], ignore_index=True) result.to_csv('result.csv', index=False)
def predict(dim, names, weight, batch_size, pretrain_model_path, model_types=None): print('-' * 100) print('multi-models begin predicting ...') print('-' * 100) # read test data test_file = '/kaggle/input/quora-question-pairs/test.csv.zip' # data test_df = pd.read_csv(test_file) test_ids = test_df['test_id'].values.tolist() result_prob_tmp = torch.zeros((len(test_ids), 2)) # load model for i, name in enumerate(names): # 3.17 add weight_ = weight[i] #model_path = '../model/' + name + '.pkl' output_model_file = os.path.join('output', name + '.pkl') state = torch.load(output_model_file) # 3.10 add model_type = model_types[i] if model_type == 'mlp': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'cnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyTextCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) elif model_type == 'rcnn': test_iter = MyDataset(file=test_file, is_train=False, pretrain_model_path=pretrain_model_path[i]) test_iter = get_dataloader(test_iter, batch_size, shuffle=False, drop_last=False) model = MyRCNNModel(dim=dim[i], pretrain_model_path=pretrain_model_path[i]) model.to(device) model.load_state_dict(state['model_state']) model.eval() print('-' * 20, 'model', i, '-' * 20) print('load model:%s, loss:%.4f, e:%d, lr:%.7f, time:%d' % (name, state['loss'], state['e'], state['lr'], state['time'])) # predict with torch.no_grad(): j = 0 for batch in tqdm(test_iter): batch = [b.cuda() for b in batch] out = model(batch, task='eval') out = out.cpu() # gpu -> cpu if j == 0: tmp = out # 初始化 tmp else: tmp = torch.cat([tmp, out], dim=0) # 将之后的预测结果拼接到 tmp 中 j += 1 # 当前 模型预测完成 print('model', i, 'predict finished!\n') # 3.17 按权重融合 result_prob_tmp += (weight_ / len(names)) * tmp # 删除模型 del model gc.collect() time.sleep(1) # 3.10 当前融合策略:prob 简单的取 avg _, result = torch.max(result_prob_tmp, dim=-1) result = result.numpy() # 3.16 update: label 0的prob 大于 3,就认为是 label=0 # with open('tmp.txt', 'w', encoding='utf-8') as f: # for r in result_prob_tmp: # f.write(str(r) + '\n') # save result df = pd.DataFrame() df['test_id'] = test_ids df['is_duplicate'] = result df.to_csv("submission.csv", encoding='utf-8', index=False)