示例#1
0
def main():
    train_path = "../data/churn_train.csv"
    test_path = "../data/churn_test.csv"
    dc_train = DataCleaning(train_path)
    dc_test = DataCleaning(test_path)
    X_train, y_train = dc_train.clean()
    X_test, y_test = dc_test.clean()

    dc_train_reg = DataCleaning(train_path)
    dc_test_reg = DataCleaning(test_path)
    X_train_reg, y_train_reg = dc_train_reg.clean(regression=True)
    X_test_reg, y_test_reg = dc_test_reg.clean(regression=True)

    train_col_names = dc_train.get_column_names()
    train_col_names_reg = dc_train_reg.get_column_names()

    rf = RandomForestClassifier
    gb = GradientBoostingClassifier
    logr = LogisticRegression

    pipe = Pipeline([rf, gb])
    pipe.fit_predict(X_train, y_train)
    pipe.print_cv_results(train_col_names, X_train, y_train)

    pipe2 = Pipeline([logr])
    pipe2.fit_predict(X_train_reg, y_train_reg)
    pipe2.print_cv_results(train_col_names_reg, X_train_reg, y_train_reg)

    plot_rocs([pipe, pipe2], [[X_train, y_train], [X_train_reg, y_train_reg]])

    test_scores = pipe.score(X_test, y_test)
def main():
    train_path = "data/data.json"
    #test_path = "data/test.csv"
    dc_train = DataCleaning(train_path)
    #dc_test = DataCleaning(test_path)
    X_train, y_train = dc_train.clean()
    #X_test, y_test = dc_test.clean()

    # dc_train_reg = DataCleaning(train_path)
    # dc_test_reg = DataCleaning(test_path)
    # X_train_reg, y_train_reg = dc_train_reg.clean(regression=True)
    # X_test_reg, y_test_reg = dc_test_reg.clean(regression=True)

    train_col_names = dc_train.get_column_names()
    # train_col_names_reg = dc_train_reg.get_column_names()

    rf = RandomForestClassifier
    gb = GradientBoostingClassifier
    logr = LogisticRegression
    svm_model = svm.SVC

    pipe = Pipeline([gb])
    pipe.fit_predict(X_train, y_train)
    pipe.print_cv_results(train_col_names, X_train, y_train)

    with open('model.pkl', 'w') as f:
        pickle.dump(pipe.trained_models[1], f)