def __build_model(self, var_idx, method='forward'): linear_reg = LinearRegression(gradient_descent=False) if method == 'forward': candidate_features = self.__best_features + [var_idx] elif method == 'backward': candidate_features = deepcopy(self.__best_features) candidate_features.remove(var_idx) X = self.X[:, candidate_features] linear_reg.fit(X, self.y) y_preds = [linear_reg.predict(x) for x in X] return calculate_r2(self.y, y_preds)
def run(self): x, y = self.__readData() model = LinearRegression() model.fit(x, y) y_predicted = [model.sumForRow(row) for row in x] # this plot is just to make sure we get values of a linear function plt.scatter(y, y_predicted, c='r') plt.show() mean_error = model.error(y_predicted, y) return mean_error
def build_models(self, df): self.n, self.p = df.shape performances = [] for k in range(1, self.p): for var_combo in itertools.combinations(df.columns[:-1], k): linear_reg = LinearRegression() X = np.asarray(df[list(var_combo)]) self.y = np.asarray(df.iloc[:, -1]) linear_reg.fit(X, self.y) y_preds = [linear_reg.predict(x) for x in X] adj_r2, aic, bic, r2, rss = self.__calculate_criterions( y_preds, k) performance = [var_combo, k, aic, bic, rss, r2, adj_r2] performances.append(performance) col_names = [ 'subset', 'num_of_variables', 'aic', 'bic', 'rss', 'r2', 'adj_r2' ] self.models_summary = pd.DataFrame(performances, columns=col_names) self.__visualize_best_subset_performance()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4) n_samples, n_features = np.shape(X) model = LinearRegression(n_iterations=100) model.fit(X_train, y_train) # Training error plot n = len(model.training_errors) training, = plt.plot(range(n), model.training_errors, label="Training Error") plt.legend(handles=[training]) plt.title("Error Plot") plt.ylabel('Mean Squared Error') plt.xlabel('Iterations') plt.show() y_pred = model.predict(X_test) mse = mean_squared_error(y_test, y_pred) print ("Mean squared error: %s" % (mse)) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * X_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * X_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='b', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.title("MSE: %.2f" % mse, fontsize=10) plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
def main(): X, y = make_regression(n_samples=100, n_features=1, noise=20) x_train, x_test, y_train, y_test = DataManipulation().train_test_split( X, y, test_size=0.4) n_samples, n_features = X.shape model = LinearRegression() model.fit(x_train, y_train) n = len(model.errors) training = plt.plot(range(n), model.errors, label='Training Errors') plt.title('Error plot') plt.xlabel('Iteration') plt.ylabel('Mean Squared Error') plt.show() y_pred = model.predict(x_test) y_pred_line = model.predict(X) # Color map cmap = plt.get_cmap('viridis') # Plot the results m1 = plt.scatter(366 * x_train, y_train, color=cmap(0.9), s=10) m2 = plt.scatter(366 * x_test, y_test, color=cmap(0.5), s=10) plt.plot(366 * X, y_pred_line, color='black', linewidth=2, label="Prediction") plt.suptitle("Linear Regression") plt.xlabel('Day') plt.ylabel('Temperature in Celcius') plt.legend((m1, m2), ("Training data", "Test data"), loc='lower right') plt.show()
data['floor'] = data['floor'].apply(lambda x: x / max) max = data['top_floor'].max() data['top_floor'] = data['top_floor'].apply(lambda x: x / max) price = data['price'].values X = np.array([np.ones(len(price)), data['size'].values, data['room'].values, data['year'].values, data['floor'].values, data['top_floor'].values]).T Y = np.array(price) regression = LinearRegression(alpha=0.000001, iteration=300, feature_count=5) regression.fit(X, Y) regression.plot() print("price:", "13800000") print("price:", int(regression.predict(np.array([1, 45,2,1977,5,5])))) print() print("price:", "15333333") print("price:", int(regression.predict(np.array([1, 40,2,1967,1,4])))) # while (True): # size = int(input("Enter size of house:")) # bedroom = int(input("Enter number of bedroom:")) # print("price:", int(regression.predict(np.array([1, size, bedroom])))) # 14700000
""" @author Victor I. Afolabi A.I. Engineer & Software developer [email protected] Created on 26 August, 2017 @ 9:33 PM. Copyright (c) 2017. victor. All rights reserved. """ # Create a LinearRegression object from regression import LinearRegression import numpy as np data = np.genfromtxt('data.csv', delimiter=',') num_iter = 1000 clf = LinearRegression(learning_rate=1e-4) clf.fit(data=data, num_iter=num_iter) print('After {:,} iterations. m = {:.2f} and b = {:.2f}'.format(num_iter, clf.m, clf.b))
def linear_solve(x_data, y_data): model = LinearRegression() model.fit(x_data, y_data) return model.predict(x_data)
def __test_cross_validation_methods(): # A small implementation of a test case from regression import LinearRegression import matplotlib.pyplot as plt # Initial values n = 100 N_bs = 1000 k_splits = 4 test_percent = 0.2 noise = 0.3 np.random.seed(1234) # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x * _x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y, y_predict))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) # print (metrics.bias(y, y_predict)) print("Bias^2:{:-20.16f}".format(metrics.bias2(y, y_predict))) # Small plotter import matplotlib.pyplot as plt plt.plot(x, y, "o", label="data") plt.plot(x, y_predict, "o", label=r"Pred, $R^2={:.4f}$".format(reg.score(y, y_predict))) print("k-fold Cross Validation") kfcv = kFoldCrossValidation(x, y, LinearRegression, design_matrix) kfcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kfcv.R2)) print("MSE: {:-20.16f}".format(kfcv.MSE)) print("Bias^2:{:-20.16f}".format(kfcv.bias)) print("Var(y):{:-20.16f}".format(kfcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kfcv.MSE, kfcv.bias, kfcv.var, kfcv.bias + kfcv.var)) print("Diff: {}".format(abs(kfcv.bias + kfcv.var - kfcv.MSE))) plt.errorbar(kfcv.x_pred_test, kfcv.y_pred, yerr=np.sqrt(kfcv.y_pred_var), fmt="o", label=r"k-fold CV, $R^2={:.4f}$".format(kfcv.R2)) print("kk Cross Validation") kkcv = kkFoldCrossValidation(x, y, LinearRegression, design_matrix) kkcv.cross_validate(k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(kkcv.R2)) print("MSE: {:-20.16f}".format(kkcv.MSE)) print("Bias^2:{:-20.16f}".format(kkcv.bias)) print("Var(y):{:-20.16f}".format(kkcv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(kkcv.MSE, kkcv.bias, kkcv.var, kkcv.bias + kkcv.var)) print("Diff: {}".format(abs(kkcv.bias + kkcv.var - kkcv.MSE))) plt.errorbar(kkcv.x_pred_test.ravel(), kkcv.y_pred.ravel(), yerr=np.sqrt(kkcv.y_pred_var.ravel()), fmt="o", label=r"kk-fold CV, $R^2={:.4f}$".format(kkcv.R2)) print("Monte Carlo Cross Validation") mccv = MCCrossValidation(x, y, LinearRegression, design_matrix) mccv.cross_validate(N_bs, k_splits=k_fold_size, test_percent=test_percent) print("R2: {:-20.16f}".format(mccv.R2)) print("MSE: {:-20.16f}".format(mccv.MSE)) print("Bias^2:{:-20.16f}".format(mccv.bias)) print("Var(y):{:-20.16f}".format(mccv.var)) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(mccv.MSE, mccv.bias, mccv.var, mccv.bias + mccv.var)) print("Diff: {}".format(abs(mccv.bias + mccv.var - mccv.MSE))) print("\nCross Validation methods tested.") plt.errorbar(mccv.x_pred_test, mccv.y_pred, yerr=np.sqrt(mccv.y_pred_var), fmt="o", label=r"MC CV, $R^2={:.4f}$".format(mccv.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$y=2x^2$") plt.legend() plt.show()
def __test_bootstrap_fit(): # A small implementation of a test case from regression import LinearRegression N_bs = 1000 # Initial values n = 200 noise = 0.2 np.random.seed(1234) test_percent = 0.35 # Sets up random matrices x = np.random.rand(n, 1) def func_excact(_x): return 2*_x*_x + np.exp(-2*_x) + noise * \ np.random.randn(_x.shape[0], _x.shape[1]) y = func_excact(x) def design_matrix(_x): return np.c_[np.ones(_x.shape), _x, _x*_x] # Sets up design matrix X = design_matrix(x) # Performs regression reg = LinearRegression() reg.fit(X, y) y = y.ravel() y_predict = reg.predict(X).ravel() print("Regular linear regression") print("R2: {:-20.16f}".format(reg.score(y_predict, y))) print("MSE: {:-20.16f}".format(metrics.mse(y, y_predict))) print("Beta: ", reg.coef_.ravel()) print("var(Beta): ", reg.coef_var.ravel()) print("") # Performs a bootstrap print("Bootstrapping") bs_reg = BootstrapRegression(x, y, LinearRegression, design_matrix) bs_reg.bootstrap(N_bs, test_percent=test_percent) print("R2: {:-20.16f}".format(bs_reg.R2)) print("MSE: {:-20.16f}".format(bs_reg.MSE)) print("Bias^2:{:-20.16f}".format(bs_reg.bias)) print("Var(y):{:-20.16f}".format(bs_reg.var)) print("Beta: ", bs_reg.coef_.ravel()) print("var(Beta): ", bs_reg.coef_var.ravel()) print("MSE = Bias^2 + Var(y) = ") print("{} = {} + {} = {}".format(bs_reg.MSE, bs_reg.bias, bs_reg.var, bs_reg.bias + bs_reg.var)) print("Diff: {}".format(abs(bs_reg.bias + bs_reg.var - bs_reg.MSE))) import matplotlib.pyplot as plt plt.plot(x.ravel(), y, "o", label="Data") plt.plot(x.ravel(), y_predict, "o", label=r"Pred, R^2={:.4f}".format(reg.score(y_predict, y))) print (bs_reg.y_pred.shape, bs_reg.y_pred_var.shape) plt.errorbar(bs_reg.x_pred_test, bs_reg.y_pred, yerr=np.sqrt(bs_reg.y_pred_var), fmt="o", label=r"Bootstrap Prediction, $R^2={:.4f}$".format(bs_reg.R2)) plt.xlabel(r"$x$") plt.ylabel(r"$y$") plt.title(r"$2x^2 + \sigma^2$") plt.legend() plt.show()
import pandas as pd import numpy as np from regression import LinearRegression df = pd.read_csv("/Users/yliang/data/trunk1/spark/assembly/target/tmp/LinearRegressionSuite/datasetWithDenseFeature2/part-00000", header = None) X = np.array(df[df.columns[1:3]]) y = np.array(df[df.columns[0]]) lir = LinearRegression(fit_intercept=True, alpha=2.3, max_iter=100, tol=1e-06, standardization=False, lower_bound=[-np.inf, 6.0, -np.inf], upper_bound=[0.0, 10.0, np.inf]) lir.fit(X, y) print("coefficients = " + str(lir.coef_)) print("intercept = " + str(lir.intercept_))
def __get_full_model_r2(self): linear_reg = LinearRegression(gradient_descent=False) linear_reg.fit(self.X, self.y) y_preds = [linear_reg.predict(x) for x in self.X] return calculate_r2(self.y, y_preds)