Exemplo n.º 1
0
# test['logvar38'] = test['var38'].map(np.log1p)
# # Encode var36 as category
# test['var36'] = test['var36'].astype('category')
# test = pd.get_dummies(test)
test_normalized = normalize(test, axis=0)
test_pca = pca.fit_transform(test_normalized)
test['PCA1'] = test_pca[:,0]
test['PCA2'] = test_pca[:,1]
sel_test = test[features]
sel_test = fs.transform(sel_test)
probs = clf.predict_proba(sel_test, ntree_limit=clf.best_iteration)

submission = pd.DataFrame({"ID":test.index, "TARGET":probs[:,1]})
submission.to_csv("submission.csv", index=False)


# plot feature importance
print 'plot feature importance ...'
#mapFeat = dict(zip(["f"+str(i) for i in range(len(features))],features))
mapFeat = dict(zip(features,features))

ts = pd.Series(clf.booster().get_fscore())
#ts.index = ts.reset_index()['index'].map(mapFeat)
ts.sort_values()[-15:].plot(kind="barh", title=("features importance"))

featp = ts.sort_values()[-15:].plot(kind='barh', x='feature', y='fscore', legend=False, figsize=(6, 10))
plt.title('XGBoost Feature Importance')
fig_featp = featp.get_figure()
fig_featp.savefig('feature_importance_xgb.png', bbox_inches='tight', pad_inches=1)

Exemplo n.º 2
0
test_normalized = normalize(test, axis=0)
test_pca = pca.fit_transform(test_normalized)
test['PCA1'] = test_pca[:, 0]
test['PCA2'] = test_pca[:, 1]
sel_test = test[features]
sel_test = fs.transform(sel_test)
probs = clf.predict_proba(sel_test, ntree_limit=clf.best_iteration)

submission = pd.DataFrame({"ID": test.index, "TARGET": probs[:, 1]})
submission.to_csv("submission.csv", index=False)

# plot feature importance
print 'plot feature importance ...'
#mapFeat = dict(zip(["f"+str(i) for i in range(len(features))],features))
mapFeat = dict(zip(features, features))

ts = pd.Series(clf.booster().get_fscore())
#ts.index = ts.reset_index()['index'].map(mapFeat)
ts.sort_values()[-15:].plot(kind="barh", title=("features importance"))

featp = ts.sort_values()[-15:].plot(kind='barh',
                                    x='feature',
                                    y='fscore',
                                    legend=False,
                                    figsize=(6, 10))
plt.title('XGBoost Feature Importance')
fig_featp = featp.get_figure()
fig_featp.savefig('feature_importance_xgb.png',
                  bbox_inches='tight',
                  pad_inches=1)