'seed': 1000, # 随机数种子,可以复现随机数据的结果,,也可以用于调整参数 } watch_list = [(dtrain, 'train'), (dvalidation, 'validation')] # res 为交叉验证之后的各个树的结果,res.shape[0]即为最优数量 res = xgb.cv(params, dtrain, num_boost_round=500, early_stopping_rounds=100) # print('最优次数 ', res.shape[0]) model = xgb.train(params, dtrain, num_boost_round=res.shape[0], evals=watch_list, verbose_eval=True, early_stopping_rounds=500) # plt.ion() # xgb.plot_tree(model,num_trees=0) # plt.show() pred_y = model.predict(dtrain) pred_y = np.array([round(pred) for pred in pred_y]) fscore = metrics.f1_score(yTrain, pred_y) print('Fvalue:', fscore) joblib.dump(model, 'carInsurancePredXgboost.model') # 预测 model = joblib.load('carInsurancePredXgboost.model') dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv') xTest = preProcTest(dataTest, scaler, toNumpy=True) dtest = xgb.DMatrix(xTest) pred_y = model.predict(dtest) pred_y = np.array([round(pred) for pred in pred_y]) print(pred_y) countEqual1 = sum(pred_y) print('预测结果为1的数量:', countEqual1) toJson(pred_y)
plt.title(u'训练过程') # # # plt.plot(epochList, lossList) # # plt.xlabel(u'迭代次数') # # plt.ylabel(u'误差') # # plt.title(u'误差训练过程') plt.pause(0.2) net.train() plt.ioff() plt.show() # 保存模型 torch.save(net, 'carInsurancePred.pt') torch.save(net.state_dict(), 'carInsurancePredParams.pt') # 处理测试数据 dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv') xTest = preProcTest(dataTest, scaler, toTensor=True) # net = Net(14, 10, 2, 3) # net.load_state_dict(torch.load('carInsurancePredParams.pt')) net.eval() # 预测并将结果写入文件 dic = OrderedDict() output = net(xTest) prob = F.softmax(output, dim=1) pred_y = torch.max(prob, 1)[1].numpy() equal1count = sum(pred_y == 1) print('预测结果为1的数量:', equal1count) toJson(pred_y)
# 训练数据处理 dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv') xTrain, yTrain, scaler = preProc(dataTrain) # 建立模型 max_iter = 100000000 # model里加上class_weight='balanced',等价于正负例分别乘以权重sum(负例)、sum(正例) # fit里有参数sample_weight,为每个sample赋上权重,是长度等于sample数量的array # 这两个作用相同,只使用一个 model = SVC(C=1.0, kernel='rbf', gamma='auto', tol=0.2, cache_size=1024, class_weight='balanced', max_iter=max_iter) model.fit(xTrain, yTrain, sample_weight=None) score = model.score(xTrain, yTrain) print('Score:', score) pred_y = model.predict(xTrain) fscore = metrics.f1_score(yTrain, pred_y) print('Fvalue:', fscore) equal1count = sum(pred_y == 1) print('预测结果为1的数量:', equal1count) print(pred_y) joblib.dump(model, 'carInsurancePredSVM.model') print('迭代次数', max_iter, '耗时:', time.time() - t1) # 预测 model = joblib.load('carInsurancePredSVM.model') dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv') xTest = preProcTest(dataTest, scaler) pred_y = model.predict(xTest) equal1count = sum(pred_y == 1) print('预测结果为1的数量:', equal1count) toJson(pred_y)