def main(): """Main function.""" training_set, test_set = load_data.split_train_and_test_set() housing, label = load_data.split_dataframe_column(training_set, 'median_house_value') CombinedAttributesAdder.compute_index(housing) housing_training = sklearn_feature_union.prepare_data(housing) # Train lin_reg = LinearRegression() lin_reg.fit(housing_training, label) # Evaluate housing, label = load_data.split_dataframe_column(test_set, 'median_house_value') housing_test = sklearn_feature_union.prepare_data(housing) predition = lin_reg.predict(housing_test) lin_mse = mean_squared_error(label, predition) lin_rmse = np.sqrt(lin_mse) print('root_mean_squared_error = {}'.format(lin_rmse))
def main(): """Main function.""" training_set, test_set = load_data.split_train_and_test_set() housing, label = load_data.split_dataframe_column(training_set, 'median_house_value') CombinedAttributesAdder.compute_index(housing) housing_training = sklearn_feature_union.prepare_data(housing) # Train param_grid = [ { 'n_estimators': [3, 10, 30], 'max_features': [2, 4, 6, 8] }, { 'bootstrap': [False], 'n_estimators': [3, 10], 'max_features': [2, 3, 4] }, ] forest_reg = RandomForestRegressor() grid_search = GridSearchCV(forest_reg, param_grid, cv=5, scoring='neg_mean_squared_error') grid_search.fit(housing_training, label) # Evaluate housing, label = load_data.split_dataframe_column(test_set, 'median_house_value') housing_test = sklearn_feature_union.prepare_data(housing) predition = grid_search.best_estimator_.predict(housing_test) grid_mse = mean_squared_error(label, predition) grid_rmse = np.sqrt(grid_mse) print('Best params = {}'.format(grid_search.best_params_)) print('root_mean_squared_error = {}'.format(grid_rmse)) print('Prediction: {}'.format(predition[:5])) print('Label: {}'.format(list(label.iloc[:5])))
def main(): """Main function.""" training_set, _ = load_data.split_train_and_test_set() housing_num, _ = load_data.split_dataframe_column(training_set, 'ocean_proximity') CombinedAttributesAdder.compute_index(housing_num) num_pipeline = Pipeline([ ('imputer', Imputer(strategy="median")), ('attribs_adder', CombinedAttributesAdder()), ('std_scaler', StandardScaler()), ]) housing_num_tr = num_pipeline.fit_transform(housing_num) print(housing_num_tr)
def main(): """Main function.""" training_set, test_set = load_data.split_train_and_test_set() housing, label = load_data.split_dataframe_column(training_set, 'median_house_value') CombinedAttributesAdder.compute_index(housing) housing_training = sklearn_feature_union.prepare_data(housing) # Train tree_reg = DecisionTreeRegressor() scores = cross_val_score(tree_reg, housing_training, label, scoring="neg_mean_squared_error", cv=10) tree_rmse_scores = np.sqrt(-scores) def display_scores(scores): print("Scores:", scores) print("Mean:", scores.mean()) print("Standard deviation:", scores.std()) display_scores(tree_rmse_scores)