Пример #1
0
def test_run():
    for m in [rfc, dtc]:
        print(model_name(m))
        m.fit(x_train, y_train)
        print('score in and out of sample', m.score(x_train, y_train),
              m.score(x_test, y_test))
        y_proba = m.predict_proba(x_test)[:, 1]
        print(roc_auc_score(y_test, y_proba))
Пример #2
0
def test_run():
    for m in [rfc, dtc, logit]:
        print(model_name(m))
        m.fit(x_train, y_train)
        print(m.classes_)
        print('score in and out of sample', m.score(x_train, y_train),
              m.score(x_test, y_test))
        y_proba = m.predict_proba(x_test)
        print(roc_auc_score(y2_test, y_proba, None))
Пример #3
0
def test_run():
    for m in [rfc, dtc, logit]:
        print(model_name(m))
        print(cross_val_score(m, x, y, scoring="accuracy", cv=cv))
        print(cross_val_score(m, x, y, scoring=accuracy_at_proba(1 / 3),
                              cv=cv))
        print(cross_val_score(m, x, y, scoring=coverage_at_proba(1 / 3),
                              cv=cv))
        print(cross_val_score(m, x, y, scoring="avg_roc_auc_macro", cv=cv))
        print(cross_val_score(m, x, y, scoring="avg_roc_auc_micro", cv=cv))
Пример #4
0
def test_run():
    for m in models:
        reg, n_iter, params = m
        gs = RandomizedSearchCV(reg,
                                param_distributions=params,
                                cv=cvs.split(x),
                                verbose=1,
                                n_iter=n_iter)
        gs.fit(x, y)
        print(model_name(reg), gs.best_score_)
Пример #5
0
def get_pipeline(est, is_tree, is_regressor, params):
    name = model_name(est)
    if name.startswith('Dummy'):
        ppl = Pipeline([
                       ('ft', FunctionTransformer()), 
                       ('mo', est)
                      ])
        params['ft__func'] = [lambda x:x[numeric_cols(x)]]
        params['ft__validate'] = [False]
    elif is_tree:
        ppl = Pipeline([
                       ('da', DateEncoder()),
                       ('du', OrdinalEncoder()),
                       ('ft', FunctionTransformer()),
                       ('se', SelectKBest2()),
                       ('mo', est)
                      ])
        params['da__ascategory'] = [False]
        params['du__drop_invariant'] = [True]
        params['ft__func'] = [lambda x:x.fillna(-999)]
        params['ft__validate'] = [False]
        params['se__score_func'] = get_selector(is_regressor, is_tree)
        params['se__k'] = [0.2, 0.5, 0.8, 1000, 1000]
    else:
        ppl = Pipeline([
                ('da', DateEncoder()),
                ('en', FeatureUnion([
                       ('nu', Pipeline([('ft', FunctionTransformer()), ('in', Imputer()), ('sc', TransformerWrap(StandardScaler()))])),
                       ('ca', Pipeline([('ft', FunctionTransformer()), ('sc', SparseCatEncoder())]))
                       ])),
                ('fu', FeatureUnion([('se', SelectKBest2()), ('dr', TruncatedSVD2())])),
                ('mo', est)
                ])
            
        params['en__nu__ft__func'] = [lambda x:x[numeric_cols(x)]]
        params['en__nu__ft__validate'] = [False]
        params['en__ca__ft__func'] = [lambda x:x[object_cols(x)]]
        params['en__ca__ft__validate'] = [False]
        params['fu__se__score_func'] = get_selector(is_regressor, is_tree)
        params['fu__se__k'] = [0.2, 0.5, 0.8, 1000]
        params['fu__dr__k'] = [0.2, 0.5, 0.8, 1000]        
        
    return name, ppl, params