示例#1
0
def Titanic():
    X = pd.read_csv('datas/train.csv')
    # указываем зависимую перменную
    y = X['Survived']
    #X.head()
    # смотрим, как распределены выжившие в зависимости от пола
    X[["Sex", "Survived"]].groupby(['Sex'], as_index=False).mean().sort_values(by='Survived', ascending=False)
    # удаляем из входов зависимую перменную и незначимые  признаки
    X.drop(['Survived', 'Name', 'PassengerId', 'Ticket'], axis=1, inplace=True)
    X.head()
    X.info()
    # в поле Cabin много пропусков, удалим и его
    X.drop(['Cabin'], axis=1, inplace=True)
    X['Embarked'].describe()
    # дозаполняем пропуски
    X['Age'].fillna(X['Age'].median(), inplace=True)
    X['Embarked'].fillna('S', inplace=True)
    X.info()
    # кодируем поле Embarked методом дамми-кодирования
    X = pd.concat([X, pd.get_dummies(X['Embarked'], prefix="Embarked")], axis=1)
    # удаляем старое поле Embarked
    X.drop(['Embarked'], axis=1, inplace=True)
    X['Sex'] = pd.factorize(X['Sex'])[0]
    X.info()
    # делим выборку на обучающую и тестовую
    X_train = X[:-200]
    X_test = X[-200:]
    y_train = y[:-200]
    y_test = y[-200:]

    clf = tree.DecisionTreeClassifier(max_depth=5, random_state=21)
    clf.fit(X_train, y_train)
    clf.score(X_train, y_train)
    clf.score(X_test, y_test)

    #rfc = RandomForestClassifier(n_estimators=10, max_depth=5, random_state=21)
    #rfc.fit(X_train, y_train)
    #rfc.score(X_test, y_test)

    eli5.explain_weights_sklearn(clf, feature_names=X_train.columns.values)

    plot_tree(clf, filled=True)
    plt.show()


    export_graphviz(clf, out_file='datas/pic.dot')
示例#2
0
def test_explain_linear_multilabel(clf):
    X, Y = make_multilabel_classification(random_state=42)
    clf.fit(X, Y)
    res = explain_weights_sklearn(clf)
    expl_text, expl_html = format_as_all(res, clf)
    for expl in [expl_text, expl_html]:
        assert 'y=4' in expl
        assert 'x0' in expl
        assert 'BIAS' in expl
示例#3
0
def test_format_html_options(force_weights, horizontal_layout):
    # test options that are not tested elsewhere
    X, y = make_regression(n_samples=100, n_targets=3, n_features=10,
                           random_state=42)
    reg = LinearRegression()
    reg.fit(X, y)
    res = explain_weights_sklearn(reg)
    kwargs = dict(
        force_weights=force_weights, horizontal_layout=horizontal_layout)
    postfix = '_' + '_'.join(
        '{}-{}'.format(k, v) for k, v in sorted(kwargs.items()))
    print(kwargs, postfix)
    # just check that it does not crash
    expl = format_as_html(res, **kwargs)
    write_html(reg, expl, format_as_text(res), postfix=postfix)
    pred_res = explain_prediction_sklearn(reg, X[0])
    pred_expl = format_as_html(pred_res, **kwargs)
    write_html(reg, pred_expl, format_as_text(pred_res),
               postfix='_expl' + postfix)