Exemplo n.º 1
0
    X = pd.concat([X_cat_dummies, X_num, X_survey], axis=1)

    if TEST == True:
        n = X.shape[0]
        test_size = np.floor(n * 0.1)
        X = X.loc[0:test_size, :]
        ys = ys.loc[0:test_size, :]

    all_scores = {}
    for dependent_variable in y_variables:
        print('Running variables: ' + dependent_variable)
        y = ys[dependent_variable].values

        print('Running Linear Regression')
        # Linear Regression
        lr_cv_train_scores, lr_cv_test_scores = models.lr_model(
            X, y, dummies, num_demo + data_sets[key])

        all_scores['lr_train_' + dependent_variable] = lr_cv_train_scores
        all_scores['lr_test_' + dependent_variable] = lr_cv_test_scores

        print('Running Random Forest')
        #Random Forest

        if TEST == True:
            param_grid = {
                'max_depth': [15],
                'min_samples_leaf': [20],
                'min_samples_split': [20],
                'n_estimators': [500]
            }
        else:
    print("被筛选的特征数量", selector.n_features_)
    print("特征排名", selector.ranking_)
    columns = x_train.columns
    selects = [columns[i] for i, j in enumerate(selector.support_) if j]
    cat_features = [
        "is_festival_user",
        "is_LAST_2YEAR_DD_ACTIVE",
        "cafe_tag_is_mop_available",
        "IS_SR_KIT_USER",
    ]
    x_train = x_train[selects]
    x_test = x_test[selects]
    x_btest = df_btest[selects]

    adaboost_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels])
    lr_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels])
    gbdt_mdoel(x_train, x_test, y_train, y_test, x_btest, df_btest[labels])

    xgb_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels])
    lgb_model(x_train, x_test, y_train, y_test, x_btest, df_btest[labels])
    cat_boost_model(x_train,
                    x_test,
                    y_train,
                    y_test,
                    x_btest,
                    df_btest[labels],
                    cat_features=cat_features)

    # from sklearn.feature_selection import RFECV
    # x = df_train.copy()
    # clf1 = RandomForestClassifier()
Exemplo n.º 3
0
    # x1 = X_train_tra[X_train_tra[labels] == 1]
    # x0 = x0.sample(n=None, frac=0.33, replace=False, weights=None, random_state=0, axis=0)
    # X_train_tra = pd.concat([x0, x1], axis=0)

    #  划分label
    x_train, y_train = seperate_label(X_train_tra, label=labels)
    x_test, y_test = seperate_label(X_test_tra, label=labels)
    print("x_train", x_train.shape)

    # #模型训练
    # adaboost模型
    adaboost_model(x_train, x_test, y_train, y_test,
                   df_btest.drop(labels, axis=1), df_btest[labels])

    #  LR模型
    lr_model(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1),
             df_btest[labels])
    # # rf模型
    rf_mdoel(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1),
             df_btest[labels])
    # # #
    # # gbdt模型
    gbdt_mdoel(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1),
               df_btest[labels])

    #  xgb模型
    xgb_model(x_train, x_test, y_train, y_test, df_btest.drop(labels, axis=1),
              df_btest[labels])

    # lgb模型
    lgb_model(x_train,
              x_test,