Python load_preprocessed_data_23_7示例，active_users.churn.preprocess.load_preprocessed_data_23_7 Python示例

示例#1

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def cnn():
    from keras.preprocessing.sequence import pad_sequences
    from keras.wrappers.scikit_learn import KerasRegressor
    from keras.models import Sequential
    from keras.layers import Dropout, Dense, Activation, Conv2D, MaxPooling2D, Flatten, Conv1D, MaxPooling1D, \
        GlobalAveragePooling1D, BatchNormalization
    x, y = load_preprocessed_data_23_7()
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)
    scaler = StandardScaler()
    scaler.fit(x)
    x_train = scaler.transform(x_train)
    x_test = scaler.transform(x_test)
    x_train = x_train.reshape(x_train.shape[0], 7, 7, 1)
    x_test = x_test.reshape(x_test.shape[0], 7, 7, 1)

    model = Sequential()

    # model.add(BatchNormalization(input_shape=(7, 7, 1)))
    model.add(
        Conv2D(32, (3, 3),
               input_shape=(7, 7, 1),
               activation='relu',
               padding='same'))
    model.add(Conv2D(32, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(0.5))

    model.add(Dense(1, activation='sigmoid'))

    model.compile(optimizer='adam',
                  loss='binary_crossentropy',
                  metrics=['accuracy'])
    print('begin')
    model.fit(x_train,
              y_train,
              batch_size=30,
              epochs=10,
              validation_split=0.2,
              shuffle=True)
    result = model.predict(x_test)
    metrics(result, y_test)
    '''

示例#2

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def knn():
    x, y = load_preprocessed_data_23_7()
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)
    model = Pipeline([('Scaler', StandardScaler()),
                      ('KNN', KNeighborsRegressor())])
    model.fit(x_train, y_train)
    result = model.predict(x_test)
    metrics(result, y_test)
    '''

示例#3

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def svr():
    x, y = load_preprocessed_data_23_7()
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)
    model = Pipeline([('Scaler', StandardScaler()),
                      ('SVM', SVR(verbose=True))])
    model.fit(x_train, y_train)
    result = model.predict(x_test)
    metrics(result, y_test)
    '''

示例#4

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def gbr():
    x, y = load_preprocessed_data_23_7()
    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)
    model = Pipeline([('Scaler', StandardScaler()),
                      ('pca', PCA(random_state=seed)),
                      ('RBR', GradientBoostingRegressor(random_state=seed))])
    model.fit(x_train, y_train)
    result = model.predict(x_test)
    metrics(result, y_test)
    '''

示例#5

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def eval_ensemble_model():
    x, y = load_preprocessed_data_23_7()

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)

    ensembles = dict()
    ensembles['ScaledAB'] = Pipeline([('Scaler', StandardScaler()),
                                      ('AB',
                                       AdaBoostRegressor(random_state=seed))])
    # ensembles['ScaledAB-KNN'] = Pipeline([('Scaler',
    #                                        StandardScaler()),
    #                                       ('ABKNN',
    #                                        AdaBoostRegressor(KNeighborsRegressor(),
    #                                                          random_state=seed))])
    ensembles['ScaledAB-LR'] = Pipeline([
        ('Scaler', StandardScaler()),
        ('ABLR', AdaBoostRegressor(LinearRegression(), random_state=seed))
    ])
    ensembles['ScaledRFR'] = Pipeline([
        ('Scaler', StandardScaler()),
        ('RFR', RandomForestRegressor(random_state=seed))
    ])
    ensembles['ScaledETR'] = Pipeline([
        ('Scaler', StandardScaler()),
        ('ETR', ExtraTreesRegressor(random_state=seed))
    ])
    ensembles['ScaledGBR'] = Pipeline([
        ('Scaler', StandardScaler()),
        ('RBR', GradientBoostingRegressor(random_state=seed))
    ])

    results = []
    for key in ensembles:
        kfold = KFold(n_splits=num_folds, random_state=seed)
        cv_result = cross_val_score(ensembles[key],
                                    x_train,
                                    y_train,
                                    cv=kfold,
                                    scoring=scoring,
                                    n_jobs=4)
        results.append(cv_result)
        print('%s: %f (%f)' % (key, cv_result.mean(), cv_result.std()))

示例#6

0

显示文件

文件： eval_model.py 项目： TheEighthDay/active_users

def eval_model():
    x, y = load_preprocessed_data_23_7()

    x_train, x_test, y_train, y_test = train_test_split(x,
                                                        y,
                                                        test_size=0.2,
                                                        random_state=seed)

    pipelines = dict()
    pipelines['ScalerLR'] = Pipeline([('Scaler', StandardScaler()),
                                      ('LR',
                                       LogisticRegression(random_state=seed))])
    pipelines['ScalerLinearR'] = Pipeline([('Scaler', StandardScaler()),
                                           ('LR', LinearRegression())])
    pipelines['ScalerLASSO'] = Pipeline([('Scaler', StandardScaler()),
                                         ('LASSO', Lasso(random_state=seed))])
    pipelines['ScalerEN'] = Pipeline([('Scaler', StandardScaler()),
                                      ('EN', ElasticNet(random_state=seed))])
    # pipelines['ScalerKNN'] = Pipeline(
    #     [('Scaler', StandardScaler()), ('KNN', KNeighborsRegressor())])
    pipelines['ScalerCART'] = Pipeline([
        ('Scaler', StandardScaler()),
        ('CART', DecisionTreeRegressor(random_state=seed))
    ])
    pipelines['ScalerSVM'] = Pipeline([('Scaler', StandardScaler()),
                                       ('SVM', SVR())])
    results = []
    for key in pipelines:
        kfold = KFold(n_splits=num_folds, random_state=seed)
        cv_result = cross_val_score(pipelines[key],
                                    x_train,
                                    y_train,
                                    cv=kfold,
                                    scoring=scoring,
                                    n_jobs=4)
        results.append(cv_result)
        print('%s: %f (%f)' % (key, cv_result.mean(), cv_result.std()))