def wine_quality_prediction(selection_method, num_features):

    X_train, X_test, y_train, y_test = prepare_wine_quality_data()

    if selection_method == 'forward':
        feature_set = forward_feature_selection(X_train, X_test, y_train, y_test, num_features)
    elif selection_method == 'backward':
        feature_set = backward_feature_selection_p_value(X_train, X_test, y_train, y_test, num_features)

    # print(feature_set)

    model = LinearRegression()

    start = time.time()

    model.fit(X_train[:, feature_set], y_train)

    end = time.time()

    time_diff = end - start
    print(time_diff * 1000)

    y_hat = model.predict(X_test[:, feature_set])

    mean_square_error = mean_squared_error(y_test, y_hat)

    return mean_square_error, time_diff*1000
Exemplo n.º 2
0
def prediction(selection_method, num_features):

    X_train, X_test, y_train, y_test = prepare_million_song_data()

    if selection_method == 'forward':
        feature_set = forward_feature_selection(X_train, X_test, y_train,
                                                y_test, num_features)
    elif selection_method == 'backward':
        feature_set = backward_feature_selection_p_value(
            X_train, X_test, y_train, y_test, num_features)

    model = LinearRegression()

    start = time.time()

    model.fit(X_train[:, feature_set], y_train)

    end = time.time()
    time_diff = end - start
    print(time_diff * 1000)

    y_test_pred = model.predict(X_test[:, feature_set])

    mean_error = calc_accuracy(y_test, y_test_pred)

    print(mean_error)

    # print(feature_set)
    return mean_error, time_diff * 1000
def prediction(selection_method):

    X_train, X_test, y_train, y_test = prepare_wine_quality_data()

    if selection_method == 'forward':
        feature_set = forward_feature_selection(X_train, X_test, y_train, y_test, 8)
    elif selection_method == 'backward':
        feature_set = backward_feature_selection_p_value(X_train, X_test, y_train, y_test, 8)

    X_test, ids = prepare_wine_quality_test_data();

    model = LinearRegression()
    model.fit(X_train[:, feature_set], y_train)

    y_test = model.predict(X_test[:, feature_set])

    # print(y_test)

    # print(feature_set)
    return ids, y_test
Exemplo n.º 4
0
def optimal_features(X_train, X_test, y_train, y_test,
                     feature_selection_method):

    num_features = len(X_train[0])
    model = LinearRegression()

    num_feature_list = []
    error_list = []
    bic_list = []

    for num in range(2, num_features + 1):

        if feature_selection_method == 'forward':
            feature_set = forward_feature_selection(X_train, X_test, y_train,
                                                    y_test, num)
        elif feature_selection_method == 'backward':
            feature_set = backward_feature_selection(X_train, X_test, y_train,
                                                     y_test, num)

        # fit the model on training given the feature set
        model.fit(X_train[:, feature_set], y_train)

        mean_square_error = mean_squared_error(
            y_test, model.predict(X_test[:, feature_set]))

        sse = mean_square_error * len(y_test)

        bic = num * math.log(len(y_test)) - 2 * math.log(sse)

        print(feature_set, ' :: ', mean_square_error)
        print(feature_set, ' :: ', bic)

        num_feature_list.append(len(feature_set))
        error_list.append(mean_square_error)
        bic_list.append(bic)

    return num_feature_list, error_list, bic_list