def test_run(): for m in [rfc, dtc]: print(model_name(m)) m.fit(x_train, y_train) print('score in and out of sample', m.score(x_train, y_train), m.score(x_test, y_test)) y_proba = m.predict_proba(x_test)[:, 1] print(roc_auc_score(y_test, y_proba))
def test_run(): for m in [rfc, dtc, logit]: print(model_name(m)) m.fit(x_train, y_train) print(m.classes_) print('score in and out of sample', m.score(x_train, y_train), m.score(x_test, y_test)) y_proba = m.predict_proba(x_test) print(roc_auc_score(y2_test, y_proba, None))
def test_run(): for m in [rfc, dtc, logit]: print(model_name(m)) print(cross_val_score(m, x, y, scoring="accuracy", cv=cv)) print(cross_val_score(m, x, y, scoring=accuracy_at_proba(1 / 3), cv=cv)) print(cross_val_score(m, x, y, scoring=coverage_at_proba(1 / 3), cv=cv)) print(cross_val_score(m, x, y, scoring="avg_roc_auc_macro", cv=cv)) print(cross_val_score(m, x, y, scoring="avg_roc_auc_micro", cv=cv))
def test_run(): for m in models: reg, n_iter, params = m gs = RandomizedSearchCV(reg, param_distributions=params, cv=cvs.split(x), verbose=1, n_iter=n_iter) gs.fit(x, y) print(model_name(reg), gs.best_score_)
def get_pipeline(est, is_tree, is_regressor, params): name = model_name(est) if name.startswith('Dummy'): ppl = Pipeline([ ('ft', FunctionTransformer()), ('mo', est) ]) params['ft__func'] = [lambda x:x[numeric_cols(x)]] params['ft__validate'] = [False] elif is_tree: ppl = Pipeline([ ('da', DateEncoder()), ('du', OrdinalEncoder()), ('ft', FunctionTransformer()), ('se', SelectKBest2()), ('mo', est) ]) params['da__ascategory'] = [False] params['du__drop_invariant'] = [True] params['ft__func'] = [lambda x:x.fillna(-999)] params['ft__validate'] = [False] params['se__score_func'] = get_selector(is_regressor, is_tree) params['se__k'] = [0.2, 0.5, 0.8, 1000, 1000] else: ppl = Pipeline([ ('da', DateEncoder()), ('en', FeatureUnion([ ('nu', Pipeline([('ft', FunctionTransformer()), ('in', Imputer()), ('sc', TransformerWrap(StandardScaler()))])), ('ca', Pipeline([('ft', FunctionTransformer()), ('sc', SparseCatEncoder())])) ])), ('fu', FeatureUnion([('se', SelectKBest2()), ('dr', TruncatedSVD2())])), ('mo', est) ]) params['en__nu__ft__func'] = [lambda x:x[numeric_cols(x)]] params['en__nu__ft__validate'] = [False] params['en__ca__ft__func'] = [lambda x:x[object_cols(x)]] params['en__ca__ft__validate'] = [False] params['fu__se__score_func'] = get_selector(is_regressor, is_tree) params['fu__se__k'] = [0.2, 0.5, 0.8, 1000] params['fu__dr__k'] = [0.2, 0.5, 0.8, 1000] return name, ppl, params