# +:+ +:+ +:+ # # By: ecross <*****@*****.**> +#+ +:+ +#+ # # +#+#+#+#+#+ +#+ # # Created: 2020/05/04 12:33:11 by ecross #+# #+# # # Updated: 2020/05/04 12:46:15 by ecross ### ########.fr # # # # **************************************************************************** # import numpy as np import matplotlib.pyplot as plt from sklearn.datasets import make_regression from my_linear_regression import MyLinearRegression as MLR if __name__ == "__main__": x, y = make_regression(n_samples=100, n_features=1, noise=10) theta = np.array([1, 1]) #mlr = MLR(theta, 0.001, 500) #theta1 = mlr.fit_(x, y) #plt.plot(x, y, 'o') #plt.plot(x, (theta1[1] * x + theta1[0]), '-r') mlr = MLR(theta, 0.5, 1100) theta1 = mlr.fit_(x, y) plt.plot(x, y, 'o') plt.plot(x, (theta1[1] * x + theta1[0]), '-g') plt.show()
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR import matplotlib.pyplot as plt data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) Y_model1 = linear_model1.predict(Xpill) Y_model2 = linear_model2.predict(Xpill) # print("Me: ", linear_model1.mse_(Yscore, Y_model1)) # print("Sc: ", mean_squared_error(Yscore, Y_model1)) # print() # # print("Me: ", linear_model2.mse_(Yscore, Y_model2)) # print("Sc: ", mean_squared_error(Yscore, Y_model2)) def plot(x, y, theta): """Plot the data and prediction line from three non-empty numpy.ndarray. Args: x: has to be an numpy.ndarray, a vector of dimension m * 1. y: has to be an numpy.ndarray, a vector of dimension m * 1. theta: has to be an numpy.ndarray, a vector of dimension 2 * 1. Returns: Nothing. Raises:
OPT_N_FOLD = 3 # used for hyper-param searching SEED = 4434 mse = make_scorer(mean_squared_error) def rmse_cv(*args, **kwargs): return np.mean(np.sqrt(cross_val_score(*args, scoring=mse))) # %%baseline kf = KFold(N_FOLD) lr_rmse = [] for train_index, test_index in kf.split(X_train): lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index] lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index] lr = MyLinearRegression() lr.fit(lr_X_train, lr_y_train) lr_rmse.append(np.sqrt(mean_squared_error( lr.predict(lr_X_test), lr_y_test))) print("LR 5fold RMSE ", np.mean(lr_rmse)) # n = 2 expansion lr_rmse = [] for train_index, test_index in kf.split(X_train): lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index] lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index] lr = MyLinearRegression(poly_degree=2) lr.fit(lr_X_train, lr_y_train) lr_rmse.append(np.sqrt(mean_squared_error( lr.predict(lr_X_test), lr_y_test)))
import numpy as np from my_linear_regression import MyLinearRegression as MyLR x = np.array([[12.4956442], [21.5007972], [31.5527382], [48.9145838], [57.5088733]]) y = np.array([[37.4013816], [36.1473236], [45.7655287], [46.6793434], [59.5585554]]) lr1 = MyLR([2, 0.7]) # Example 0.0: print(lr1.predict_(x)) # Output: # array([ [10.74695094], # [17.05055804], # [24.08691674], # [36.24020866], # [42.25621131]]) # Example 0.1: cost_elems = lr1.cost_elem_(lr1.predict_(x), y) print(cost_elems) # Output: # array([ [77.72116511], # [49.33699664], # [72.38621816], # [37.29223426], # [78.28360514]]) # Example 0.2: print(lr1.cost_(lr1.predict_(x), y))
from my_linear_regression import MyLinearRegression as MyLR import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data['Micrograms']).reshape(-1, 1) Yscore = np.array(data['Score']).reshape(-1, 1) linear_model1 = MyLR(np.array([89.0, -8])) linear_model2 = MyLR(np.array([89.0, -6])) linear_model1.fit_ulti(Xpill, Yscore) #Y_model1 = linear_model1.predict_(Xpill) #Y_model2 = linear_model2.predict_(Xpill) #print(Y_model1) #print(Y_model2) #print(linear_model1.cost_(Xpill, Yscore)) theta = linear_model1.fit_(Xpill, Yscore) #print(theta)
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR data = pd.read_csv("../resources/are_blue_pills_magics.csv") # print(data) Xpill = np.array(data.Micrograms).reshape(-1, 1) Yscore = np.array(data.Score).reshape(-1, 1) # linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) # linear_model2.plot_costs(Xpill, Yscore) linear_model2.plot_best_h(Xpill, Yscore) # Y_model1 = linear_model1.predict_(Xpill) # Y_model2 = linear_model2.predict_(Xpill) # print(linear_model1.cost_(Yscore, Y_model1)) # 57.60304285714282 # print(mean_squared_error(Yscore, Y_model1)) # 57.603042857142825 # print(linear_model2.cost_(Yscore, Y_model2)) # 232.16344285714285 # print(mean_squared_error(Yscore, Y_model2)) # 232.16344285714285
import numpy as np from my_linear_regression import MyLinearRegression as MyLR if __name__ == "__main__": X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [34., 55., 89., 144.]]) Y = np.array([[23.], [48.], [218.]]) mylr = MyLR([[1.], [1.], [1.], [1.], [1]]) print("# Example 0:") print(mylr.predict(X)) print("# Output:") print("array([[8.], [48.], [323.]])") print() print("# Example 1:") print(mylr.cost_elem_(X,Y)) print("# Output:") print("array([[37.5], [0.], [1837.5]])") print() print("# Example 2:") print(mylr.cost_(X,Y)) print("# Output:") print(1875.0) print() # sys.lol() print("# Example 3:") mylr.fit_(X, Y) print(mylr.theta) print("# Output:")
def mse_(self, y, y_hat): return MLR.mse_(self, y, y_hat)
def test_MyLinearRegressing(): x = np.array([[12.4956442], [21.5007972], [ 31.5527382], [48.9145838], [57.5088733]]) y = np.array([[37.4013816], [36.1473236], [ 45.7655287], [46.6793434], [59.5585554]]) lr1 = MyLR([2, 0.7]) # Example 0.0: print(lr1.predict_(x), end="\n\n") # Output: # array([[10.74695094], # [17.05055804], # [24.08691674], # [36.24020866], # [42.25621131]]) # Example 0.1: print(lr1.cost_elem_(lr1.predict_(x), y), end="\n\n") # Output: # array([[77.72116511], # [49.33699664], # [72.38621816], # [37.29223426], # [78.28360514]]) # Example 0.2: print(lr1.cost_(lr1.predict_(x), y), end="\n\n") # Output: # 315.0202193084312 # Example 1.0: # lr2 = MyLR([0, 0]) lr2 = MyLR([1, 1], 5e-8, 1500000) lr2.fit_(x, y) print(lr2.thetas, end="\n\n") # Output: # array([[1.40709365], # [1.1150909]]) # Example 1.1: print(lr2.predict_(x), end="\n\n") # Output: # array([[15.3408728], # [25.38243697], # [36.59126492], # [55.95130097], # [65.53471499]]) # Example 1.2: print(lr2.cost_elem_(lr2.predict_(x), y), end="\n\n") # Output: # array([[35.6749755], # [4.14286023], # [1.26440585], # [29.30443042], # [22.27765992]]) # Example 1.3: print(lr2.cost_(lr2.predict_(x), y), end="\n\n")
for i in range(3): plt.plot(x_train[:, i:i + 1], y_train, 'go') plt.title(data.columns[i]) plt.plot(x_test[:, i:i + 1], y_test, 'ro', markersize=3) plt.show() #initialise thetas as array with feature number + 1 zeros thetas = np.zeros(new_features.shape[1] + 1) #should be able to use same alpha and cycle number for all, as same data #carry out linear regression on training data cost_list = [] mlr = MLR(thetas, alpha=0.1, n_cycle=400) mlr.fit_(x_train, y_train) y_hat = mlr.predict_(x_test)[1] cost_list.append(mlr.mse_(y_test, y_hat)) plot(x_test, y_test, y_hat, features) #carry out 9 ridge regressions on training data, with lambda from 0.1 to 0.9 mrg = MRG(thetas, alpha=0.1, n_cycle=400) for i in range(1, 10): mrg.lambda_ = i / 10 mrg.thetas = thetas plt.title('lambda = ' + str(i / 10)) mrg.fit_(x_train, y_train) y_hat = mrg.predict_(x_test)[1] cost_list.append(mlr.mse_(y_test, y_hat))
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR data = pd.read_csv("../resources/are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) print(Xpill) print(Yscore) linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) # linear_model1.plot(Xpill, Yscore) # linear_model2.plot(Xpill, Yscore) Y_model1 = linear_model1.predict_(Xpill) Y_model2 = linear_model2.predict_(Xpill) print("mine: ", linear_model1.mse_(Yscore, Y_model1)) #MY # 57.60304285714282 print("not mine: ", mean_squared_error(Yscore, Y_model1)) # 57.603042857142825 print("mine: ", linear_model2.mse_(Yscore, Y_model2)) # MY # 232.16344285714285 print("not mine: ", mean_squared_error(Yscore, Y_model2))
import pandas as pd from sklearn.cross_validation import train_test_split # Importing the dataset dataset = pd.read_csv('../datasets/salary_data.csv') X = dataset.iloc[:, :-1].values y = dataset.iloc[:, 1].values # Splitting the dataset into the Training set and Test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=0) # Fitting Simple Linear Regression to the Training set regressor = MyLinearRegression() regressor.train(X, y) print(regressor.weight) print(regressor.bias) # Predicting the Test set results y_pred = regressor.predict(X_test) # Visualising the Training set results plt.scatter(X_train, y_train, color='red') plt.plot(X_train, regressor.predict(X_train), color='blue') plt.title('Salary vs Experience (Training set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show()
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR import matplotlib.pyplot as plt from polynomial_model import add_polynomial_features data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) Y_model1 = linear_model1.predict(Xpill) def continuous_plot(x, y, i, lr): # Build the model: # Plot: ## To get a smooth curve, we need a lot of data points continuous_x = np.arange(1, 7.01, 0.01).reshape(-1, 1) x_ = add_polynomial_features(continuous_x, i) y_hat = lr.predict(x_) print(x.shape, y.shape) plt.scatter(x.T[0], y) plt.plot(continuous_x, y_hat, color='orange') plt.show() cost = [] x = add_polynomial_features(Xpill, 10) big_theta = [[2.03333758e-06], [4.76503382e-06], [1.29939248e-05],
Y_model1 = lm.predict_(x) plt.plot(x, Y_model1, '--r', color='g') plt.scatter(x, y, color='b', s=30, label='Strue(pills)') plt.scatter(x, Y_model1, color='g', s=20, label='Spredict(pills)') plt.xlabel('Quantity of blue pill (in micrograms)') plt.ylabel('Space driving score') plt.grid(True) plt.legend(loc='upper left') plt.show() def plot_cost_(lm): plt.scatter(lm.thetas[0], lm.thetas[1], color='g', s=20) plt.ylabel('Cost') plt.grid(True) plt.show() data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data['Micrograms']).reshape(-1, 1) Yscore = np.array(data['Score']).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) #plot_(linear_model1, Xpill, Yscore) #plot_cost_(linear_model1) linear_model1.fit_(Xpill, Yscore) print(linear_model1.thetas) #plot_cost_(linear_model1) plot_(linear_model1, Xpill, Yscore)
from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR def print_costfn(t0, y): for i in np.linspace(t0 - 10, t0 + 50, 3000): linear_model3 = MyLR(np.array([[-10], [i]])) Y_model3 = linear_model3.predict_(Xpill) plt.plot(linear_model3.thetas[1], linear_model3.cost_(y, Y_model3), 'gs') data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) Y_model1 = linear_model1.predict_(Xpill) Y_model2 = linear_model2.predict_(Xpill) linear_model1_2 = MyLR(linear_model1.fit_(Xpill, Yscore)) Y_model1_2 = linear_model1_2.predict_(Xpill) print(linear_model1.cost_(Yscore, Y_model1) * 2) print(mean_squared_error(Yscore, Y_model1)) print(linear_model1.cost_(Yscore, Y_model2) * 2) print(mean_squared_error(Yscore, Y_model2)) plt.plot(Xpill, Y_model1_2, 'gs') plt.plot(Xpill, Y_model1_2, 'g--', label="Spredict(pills)") plt.plot(Xpill, Yscore, 'bo', label="Strue")
def gradient(self, x, y): return MLR.gradient(self, x, y)
def print_costfn(t0, y): for i in np.linspace(t0 - 10, t0 + 50, 3000): linear_model3 = MyLR(np.array([[-10], [i]])) Y_model3 = linear_model3.predict_(Xpill) plt.plot(linear_model3.thetas[1], linear_model3.cost_(y, Y_model3), 'gs')
thetas = np.array([1, 1]) plt.ylabel("price") #mlr_age = MLR(thetas, alpha=0.01, n_cycle=4000) #plot(mlr_age, age, y, "age") #mlr_thrust = MLR(thetas, alpha=0.00001, n_cycle=30) #plot(mlr_thrust, tp, y, "thrust power") #mlr_tm = MLR(thetas, alpha=0.00022, n_cycle=76000) #plot(mlr_tm, tm, y, "terameters") thetas = np.array([1, 1, 1, 1]) mlr_multi = MLR(thetas, alpha=0.00009, n_cycle=100) #mlr_multi.plot_cost_change(features, y) th = mlr_multi.fit_(features, y) print(th) y_hat = th[0] i = 1 while i < 4: y_hat += th[i] * features[:, i - 1:i] i += 1 plt.plot(age, y, "ob") plt.xlabel("age") plt.plot(age, y_hat, "o", markersize=2) plt.show() plt.plot(tp, y, "ob") plt.xlabel("thrust power")
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR if __name__ == '__main__': data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) Y_model1 = linear_model1.predict_(Xpill) Y_model2 = linear_model2.predict_(Xpill) linear_model1.plot(Xpill, Yscore, "o", Xpill, Y_model1, "x--", Xpill, Y_model2, "b") print(linear_model1.mse_(Xpill, Yscore)) print(mean_squared_error(Yscore, Y_model1)) print(linear_model2.mse_(Xpill, Yscore)) print(mean_squared_error(Yscore, Y_model2))
import numpy as np from my_linear_regression import MyLinearRegression as MyLR x = np.array([12.4956442, 21.5007972, 31.5527382, 48.9145838, 57.5088733]) y = np.array([37.4013816, 36.1473236, 45.7655287, 46.6793434, 59.5585554]) lr1 = MyLR([2, 0.7]) # Example 0.0: print("Example 0.0") print(lr1.predict_(x)) # Output: # array([[10.74695094], # [17.05055804], # [24.08691674], # [36.24020866], # [42.25621131]]) # Example 0.1: print("\nExample 0.1") print(lr1.cost_elem_(lr1.predict_(x), y)) # Output: # array([[77.72116511], # [49.33699664], # [72.38621816], # [37.29223426], # [78.28360514]]) # Example 0.2: print("\nExample 0.2") print(lr1.cost_(lr1.predict_(x), y))
Y = np.array(df.iloc[:, -1]).reshape(-1, 1) pkl = DataHandler(ARGS) if ARGS.load: PreP_x, PreP_y, theta = pkl.load() if PreP_x.scaler: X = PreP_x.re_apply_minmax(X) if PreP_y.scaler: Y = PreP_y.re_apply_minmax(Y) if type(X) == type(None) or type(Y) == type(None): sys.exit() else: PreP_x = Preprocessing(X, scaler=ARGS.scaler) PreP_y = Preprocessing(Y, scaler=ARGS.scaler) X = PreP_x.data Y = PreP_y.data theta = [1] * (X.shape[1] + 1) lr = MyLinearRegression(theta, alpha=ARGS.alpha, n_cycle=ARGS.n_cycle, visual=ARGS.visual) err = lr.fit(X, Y) if type(err) == type(None): sys.exit() pkl.save(PreP_x, PreP_y, lr.theta) if ARGS.visual: lr.plot_results(X, Y)