# look at XGBoost feature importance
bst = sk_xgb(max_depth=6, 
         learning_rate=0.5, 
         n_estimators= 50, 
         objective='reg:linear',         
         booster='gbtree',
         n_jobs=6, 
         random_state=RANDOM_SEED,
         eval_metric='auc',
         silent = False)
bst.fit(X,y)

# Nice graph
fig, ax = plt.subplots(figsize=(10, 80))
sk_xgb.plot_importance(bst, ax = ax)

# make a function
features = t_data.columns
tmp = np.vstack((features,bst.feature_importances_)).T
select_features = pd.DataFrame(tmp)
select_features.columns = ('Feature', 'Importance')
select_features = select_features.sort_values(by='Importance', ascending = False, inplace = False)
print(select_features)


# make a function
# make new smaller dataset
selected_features = select_features.loc[select_features['Importance'] > CUTOFF]
select_list = selected_features['Feature']
select_train_list = select_list.append(pd.Series(['target', 'ID_code']), ignore_index=True)