Exemplo n.º 1
0
sm = RandomOverSampler()
X_train_res, y_train_res = sm.fit_sample(X_train, y_train)
model.fit(X_train_res, y_train_res)
#%%
y_pred = model.predict(X_test)
acc_score = model.score(X_test, y_test)
print(f'The accuracy score is: {acc_score}')
rec_score = recall_score(y_test, y_pred)
print(f'The recall score is: {rec_score}')
#%%
#mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False)
fig, ax = plt.subplots(1, 2)
fig.suptitle(f'accuracy score: {acc_score:.3f}, recall score: {rec_score:.3f}',
             fontsize=24)
mm.plot_confusion_matrix(y_test, y_pred, ax=ax[0], normalize=True)
mm.plot_confusion_matrix(y_test, y_pred, ax=ax[1], normalize=False)
#%%
y_pred = model.predict(X_lastweek)
acc_score = model.score(X_lastweek, y_pred)
print(f'The accuracy score is: {acc_score}')
rec_score = recall_score(y_lastweek, y_pred)
print(f'The recall score is: {rec_score}')
#%%
#mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False)
fig, ax = plt.subplots(1, 2)
fig.suptitle(f'accuracy score: {acc_score:.3f}, recall score: {rec_score:.3f}',
             fontsize=24)
mm.plot_confusion_matrix(y_lastweek, y_pred, ax=ax[0], normalize=True)
mm.plot_confusion_matrix(y_lastweek, y_pred, ax=ax[1], normalize=False)
Exemplo n.º 2
0
for n in tqdm(np.arange(1, 127)):
    X_train_chunky = X_train[n * chunksize:(n + 1) * chunksize]
    y_train_chunky = y_train[n * chunksize:(n + 1) * chunksize]
    X_train_chunky = loo.fit_transform(X_train_chunky, y_train_chunky)
    X_train_res, y_train_res = sm.fit_sample(X_train_chunky, y_train_chunky)
    model.partial_fit(X_train_res, y_train_res, classes=np.unique(y))
dump(model, 'SGD_model_minibatch_200k_127.joblib')
#%%
#diplay metrics for model performance
y_pred = model.predict(X_test)
acc_score = model.score(X_test, y_test)
print(f'The accuracy score is: {acc_score}')
acc_score = recall_score(y_test, y_pred)
print(f'The accuracy score is: {acc_score}')
#%%
#mm.model_report_card(model, X_train_res, y_train_res, X_test, y_test, normalize=False)
fig, ax = plt.subplots()
mm.plot_confusion_matrix(y_test, y_pred, ax=ax, normalize=True)
fig, ax = plt.subplots()
mm.plot_confusion_matrix(y_test, y_pred, ax=ax, normalize=False)

#%%
#pca = PCA(n_components = 0.9)
#X_pca = pca.fit_transform(X_train)
#print(X_pca.shape)
##fig, ax = plt.subplots()
##ax.scatter(X_pca[:,0],X_pca[:,1], c=y_train_res)
#%%
#fn = 'df_full.gzip'
#lf.temp_save(fname=os.path.join(data_directory,fn), df=df)
Exemplo n.º 3
0
# prob_y_train = logit_result.predict()
# 测试数据集
X_test_metric = sm.add_constant(X_test[params.index[1:]])
# 测试数据结果
prob_y_test = logit_result.predict(X_test_metric)
# 结果集按
label_pred_test = pd.np.where(prob_y_test > 0.5, 1, 0)

# ROC 曲线
plot_roc_curve(prob_y_test, y_test)
# KS表&KS曲线
ks_stattable, _ = ks_stats(prob_y_test, y_test)
# 提升图&lorenz曲线
# lift_lorenz(prob_y_test, y_test)
# 构造混淆矩阵
plot_confusion_matrix(y_test, label_pred_test, labels=[0, 1])
"""
生成评分卡
"""
method = 4
var_list = list(params.index)
est = params['参数估计']

# 生成评分卡
# method = 4
# # 删除参数索引名称中的 BIN 和 WOE
# params.index = [k.replace("_BIN", "") for k in params.index]
# params.index = [k.replace("_WOE", "") for k in params.index]
# paramsEst = params['参数估计']
# var_list = list(paramsEst.index)[1:]
#