示例#1
0
def main():
    """Main function."""
    training_set, test_set = load_data.split_train_and_test_set()
    housing, label = load_data.split_dataframe_column(training_set,
                                                      'median_house_value')
    CombinedAttributesAdder.compute_index(housing)

    housing_training = sklearn_feature_union.prepare_data(housing)

    # Train
    lin_reg = LinearRegression()
    lin_reg.fit(housing_training, label)

    # Evaluate
    housing, label = load_data.split_dataframe_column(test_set,
                                                      'median_house_value')
    housing_test = sklearn_feature_union.prepare_data(housing)
    predition = lin_reg.predict(housing_test)
    lin_mse = mean_squared_error(label, predition)
    lin_rmse = np.sqrt(lin_mse)
    print('root_mean_squared_error = {}'.format(lin_rmse))
def main():
    """Main function."""
    training_set, test_set = load_data.split_train_and_test_set()
    housing, label = load_data.split_dataframe_column(training_set,
                                                      'median_house_value')
    CombinedAttributesAdder.compute_index(housing)

    housing_training = sklearn_feature_union.prepare_data(housing)

    # Train
    param_grid = [
        {
            'n_estimators': [3, 10, 30],
            'max_features': [2, 4, 6, 8]
        },
        {
            'bootstrap': [False],
            'n_estimators': [3, 10],
            'max_features': [2, 3, 4]
        },
    ]

    forest_reg = RandomForestRegressor()
    grid_search = GridSearchCV(forest_reg,
                               param_grid,
                               cv=5,
                               scoring='neg_mean_squared_error')
    grid_search.fit(housing_training, label)

    # Evaluate
    housing, label = load_data.split_dataframe_column(test_set,
                                                      'median_house_value')
    housing_test = sklearn_feature_union.prepare_data(housing)
    predition = grid_search.best_estimator_.predict(housing_test)
    grid_mse = mean_squared_error(label, predition)
    grid_rmse = np.sqrt(grid_mse)
    print('Best params = {}'.format(grid_search.best_params_))
    print('root_mean_squared_error = {}'.format(grid_rmse))
    print('Prediction: {}'.format(predition[:5]))
    print('Label: {}'.format(list(label.iloc[:5])))
示例#3
0
def main():
    """Main function."""
    training_set, _ = load_data.split_train_and_test_set()
    housing_num, _ = load_data.split_dataframe_column(training_set,
                                                      'ocean_proximity')
    CombinedAttributesAdder.compute_index(housing_num)
    num_pipeline = Pipeline([
        ('imputer', Imputer(strategy="median")),
        ('attribs_adder', CombinedAttributesAdder()),
        ('std_scaler', StandardScaler()),
    ])
    housing_num_tr = num_pipeline.fit_transform(housing_num)

    print(housing_num_tr)
def main():
    """Main function."""
    training_set, test_set = load_data.split_train_and_test_set()
    housing, label = load_data.split_dataframe_column(training_set,
                                                      'median_house_value')
    CombinedAttributesAdder.compute_index(housing)

    housing_training = sklearn_feature_union.prepare_data(housing)

    # Train
    tree_reg = DecisionTreeRegressor()
    scores = cross_val_score(tree_reg,
                             housing_training,
                             label,
                             scoring="neg_mean_squared_error",
                             cv=10)
    tree_rmse_scores = np.sqrt(-scores)

    def display_scores(scores):
        print("Scores:", scores)
        print("Mean:", scores.mean())
        print("Standard deviation:", scores.std())

    display_scores(tree_rmse_scores)