def test_feature_importances_w_subestimators():
    rf = Mock()
    tree_1 = Mock()
    tree_1.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    tree_2 = Mock()
    tree_2.feature_importances_ = np.array([0.10, 0.10, 0.8, 0.06, 0.01])
    tree_3 = Mock()
    tree_3.feature_importances_ = np.array([0.09, 0.01, 0.9, 0.12, 0.02])
    tree_4 = Mock()
    tree_4.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.01])
    rf.estimators_ = [tree_1, tree_2, tree_3, tree_4]
    rf.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(rf)
def test_feature_importances_w_subestimators():
    rf = Mock()
    tree_1 = Mock()
    tree_1.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    tree_2 = Mock()
    tree_2.feature_importances_ = np.array([0.10, 0.10, 0.8, 0.06, 0.01])
    tree_3 = Mock()
    tree_3.feature_importances_ = np.array([0.09, 0.01, 0.9, 0.12, 0.02])
    tree_4 = Mock()
    tree_4.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.01])
    rf.estimators_ = [tree_1, tree_2, tree_3, tree_4]
    rf.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(rf)
 u'seaborn-colorblind',
 u'seaborn-deep',
 u'seaborn-whitegrid',
 u'seaborn-bright',
 u'seaborn-poster',
 u'seaborn-muted',
 u'seaborn-paper',
 u'seaborn-white',
 u'seaborn-pastel',
 u'seaborn-dark',
 u'seaborn-dark-palette']
'''
my_dpi = 96
plt.style.use('ggplot')
plt.rcParams["figure.figsize"] = (11.69, 8.27)
plot.feature_importances(classifier, feature_names=feature_names)
# plt.xlabel('Feature Names')
plt.ylabel('Feature Importance Score (%)')
# plt.title('Features Importance')
plt.gca().xaxis.set_minor_formatter(ticker.NullFormatter())
plt.xticks(rotation=90)
f = plt.gcf()
f.subplots_adjust(bottom=0.4)
plt.savefig("out-stats-graphs/RF_Feature_Importance1.pdf")

plt.style.use('fivethirtyeight')
plt.rcParams["figure.figsize"] = (1164 / my_dpi, 1024 / my_dpi)
plot.feature_importances(classifier, feature_names=feature_names)
plt.ylabel('Feature Importance Score (%)')
plt.xlabel('Feature Names')
plt.gca().xaxis.set_minor_formatter(ticker.NullFormatter())
def test_feature_importances_feature_names():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    feature_names = ['thing_a', 'thing_b', 'thing_c', 'thing_d', 'thing_e']
    plot.feature_importances(feature_importances, feature_names=feature_names)
def test_feature_importances_top3():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(feature_importances, top_n=3)
def test_feature_importances_from_array():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(feature_importances)
def test_feature_importances():
    model = Mock()
    model.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(model)
Exemplo n.º 8
0
 def test_feature_importances(self):
     with self.assertRaisesRegexp(ValueError, "needed to plot"):
         plot.feature_importances(None)
def test_feature_importances_feature_names():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    feature_names = ['thing_a', 'thing_b', 'thing_c', 'thing_d', 'thing_e']
    plot.feature_importances(feature_importances, feature_names=feature_names)
def test_feature_importances_top3():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(feature_importances, top_n=3)
def test_feature_importances_from_array():
    feature_importances = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(feature_importances)
def test_feature_importances():
    model = Mock()
    model.feature_importances_ = np.array([0.12, 0.10, 0.8, 0.06, 0.03])
    plot.feature_importances(model)
"""
Feature importances plot
"""
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split

from sklearn_evaluation import plot

X, y = datasets.make_classification(200, 20, n_informative=5, class_sep=0.65)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

model = RandomForestClassifier(n_estimators=1)
model.fit(X_train, y_train)

# plot all features
ax = plot.feature_importances(model)
plt.show()

# only top 5
plot.feature_importances(model, top_n=5)
plt.show()
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import train_test_split

from sklearn_evaluation import plot

data = datasets.make_classification(200, 10, 5, class_sep=0.65)
X = data[0]
y = data[1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3)

est = RandomForestClassifier()
est.fit(X_train, y_train)

plot.feature_importances(est, top_n=5)
plt.show()