'seed': 1000,  # 随机数种子,可以复现随机数据的结果,,也可以用于调整参数
}
watch_list = [(dtrain, 'train'), (dvalidation, 'validation')]
# res 为交叉验证之后的各个树的结果,res.shape[0]即为最优数量
res = xgb.cv(params, dtrain, num_boost_round=500, early_stopping_rounds=100)
# print('最优次数 ', res.shape[0])
model = xgb.train(params, dtrain, num_boost_round=res.shape[0], evals=watch_list, verbose_eval=True,
                  early_stopping_rounds=500)
# plt.ion()
# xgb.plot_tree(model,num_trees=0)
# plt.show()
pred_y = model.predict(dtrain)
pred_y = np.array([round(pred) for pred in pred_y])
fscore = metrics.f1_score(yTrain, pred_y)
print('Fvalue:', fscore)
joblib.dump(model, 'carInsurancePredXgboost.model')



# 预测
model = joblib.load('carInsurancePredXgboost.model')
dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv')
xTest = preProcTest(dataTest, scaler, toNumpy=True)
dtest = xgb.DMatrix(xTest)
pred_y = model.predict(dtest)
pred_y = np.array([round(pred) for pred in pred_y])
print(pred_y)
countEqual1 = sum(pred_y)
print('预测结果为1的数量:', countEqual1)
toJson(pred_y)
    plt.title(u'训练过程')
    #
    # # plt.plot(epochList, lossList)
    # # plt.xlabel(u'迭代次数')
    # # plt.ylabel(u'误差')
    # # plt.title(u'误差训练过程')
    plt.pause(0.2)
    net.train()
plt.ioff()
plt.show()

# 保存模型
torch.save(net, 'carInsurancePred.pt')
torch.save(net.state_dict(), 'carInsurancePredParams.pt')

# 处理测试数据
dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv')
xTest = preProcTest(dataTest, scaler, toTensor=True)
# net = Net(14, 10, 2, 3)
# net.load_state_dict(torch.load('carInsurancePredParams.pt'))
net.eval()

# 预测并将结果写入文件
dic = OrderedDict()
output = net(xTest)
prob = F.softmax(output, dim=1)
pred_y = torch.max(prob, 1)[1].numpy()
equal1count = sum(pred_y == 1)
print('预测结果为1的数量:', equal1count)
toJson(pred_y)
예제 #3
0
# 训练数据处理
dataTrain = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_train.csv')
xTrain, yTrain, scaler = preProc(dataTrain)

# 建立模型
max_iter = 100000000
# model里加上class_weight='balanced',等价于正负例分别乘以权重sum(负例)、sum(正例)
# fit里有参数sample_weight,为每个sample赋上权重,是长度等于sample数量的array
# 这两个作用相同,只使用一个
model = SVC(C=1.0, kernel='rbf', gamma='auto', tol=0.2, cache_size=1024, class_weight='balanced', max_iter=max_iter)
model.fit(xTrain, yTrain, sample_weight=None)
score = model.score(xTrain, yTrain)
print('Score:', score)
pred_y = model.predict(xTrain)
fscore = metrics.f1_score(yTrain, pred_y)
print('Fvalue:', fscore)
equal1count = sum(pred_y == 1)
print('预测结果为1的数量:', equal1count)
print(pred_y)
joblib.dump(model, 'carInsurancePredSVM.model')
print('迭代次数', max_iter, '耗时:', time.time() - t1)

# 预测
model = joblib.load('carInsurancePredSVM.model')
dataTest = pd.read_csv(r'C:\Users\ZY\Desktop\ML\VI_test.csv')
xTest = preProcTest(dataTest, scaler)
pred_y = model.predict(xTest)
equal1count = sum(pred_y == 1)
print('预测结果为1的数量:', equal1count)
toJson(pred_y)