def Titanic(): X = pd.read_csv('datas/train.csv') # указываем зависимую перменную y = X['Survived'] #X.head() # смотрим, как распределены выжившие в зависимости от пола X[["Sex", "Survived"]].groupby(['Sex'], as_index=False).mean().sort_values(by='Survived', ascending=False) # удаляем из входов зависимую перменную и незначимые признаки X.drop(['Survived', 'Name', 'PassengerId', 'Ticket'], axis=1, inplace=True) X.head() X.info() # в поле Cabin много пропусков, удалим и его X.drop(['Cabin'], axis=1, inplace=True) X['Embarked'].describe() # дозаполняем пропуски X['Age'].fillna(X['Age'].median(), inplace=True) X['Embarked'].fillna('S', inplace=True) X.info() # кодируем поле Embarked методом дамми-кодирования X = pd.concat([X, pd.get_dummies(X['Embarked'], prefix="Embarked")], axis=1) # удаляем старое поле Embarked X.drop(['Embarked'], axis=1, inplace=True) X['Sex'] = pd.factorize(X['Sex'])[0] X.info() # делим выборку на обучающую и тестовую X_train = X[:-200] X_test = X[-200:] y_train = y[:-200] y_test = y[-200:] clf = tree.DecisionTreeClassifier(max_depth=5, random_state=21) clf.fit(X_train, y_train) clf.score(X_train, y_train) clf.score(X_test, y_test) #rfc = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=21) #rfc.fit(X_train, y_train) #rfc.score(X_test, y_test) eli5.explain_weights_sklearn(clf, feature_names=X_train.columns.values) plot_tree(clf, filled=True) plt.show() export_graphviz(clf, out_file='datas/pic.dot')
def test_explain_linear_multilabel(clf): X, Y = make_multilabel_classification(random_state=42) clf.fit(X, Y) res = explain_weights_sklearn(clf) expl_text, expl_html = format_as_all(res, clf) for expl in [expl_text, expl_html]: assert 'y=4' in expl assert 'x0' in expl assert 'BIAS' in expl
def test_format_html_options(force_weights, horizontal_layout): # test options that are not tested elsewhere X, y = make_regression(n_samples=100, n_targets=3, n_features=10, random_state=42) reg = LinearRegression() reg.fit(X, y) res = explain_weights_sklearn(reg) kwargs = dict( force_weights=force_weights, horizontal_layout=horizontal_layout) postfix = '_' + '_'.join( '{}-{}'.format(k, v) for k, v in sorted(kwargs.items())) print(kwargs, postfix) # just check that it does not crash expl = format_as_html(res, **kwargs) write_html(reg, expl, format_as_text(res), postfix=postfix) pred_res = explain_prediction_sklearn(reg, X[0]) pred_expl = format_as_html(pred_res, **kwargs) write_html(reg, pred_expl, format_as_text(pred_res), postfix='_expl' + postfix)