def plot_cost(x, y): """Plot the data and prediction line from three non-empty numpy.ndarray. Args: x: has to be an numpy.ndarray, a vector of dimension m * 1. y: has to be an numpy.ndarray, a vector of dimension m * 1. theta: has to be an numpy.ndarray, a vector of dimension 2 * 1. Returns: Nothing. Raises: This function should not raise any Exceptions. """ # plt.plot(x, y, 'o') # x = np.linspace(-15,5,100) plt.ylim((10, 50)) plt.xlim((-13, -4.5)) ran = 15 upd = ran * 2 / 6 for t0 in np.arange(89 - ran, 89 + ran, upd): cost_list = [] theta_list = [] for t1 in np.arange(-8 - 100, -8 + 100, 0.1): lr = MyLR(thetas=[t0, t1], alpha=1e-3, max_iter=50000) y_ = lr.predict(x) mse_c = lr.cost_(y, y_) #[0][0] cost_list.append(mse_c) theta_list.append(t1) # print(cost_list[-1]) label = "θ[0]=" + str(int(t0 * 10) / 10) print(label, "done!") plt.plot(theta_list, cost_list, label=label) plt.xlabel("θ[1]") plt.ylabel("MSE(θ[0], θ[1])") plt.legend(loc='upper left') plt.show()
def solve_linear_regression(self): nr_features = len(self.train_inputs[0]) # for problem with 1 or 2 features plot data split if nr_features == 1: plot_data_split_simple(self.train_inputs, self.train_outputs, self.test_inputs, self.test_outputs, [self.input_features[0], "Happiness"]) elif nr_features == 2: plot_data_split_multiple( self.train_inputs, self.train_outputs, self.test_inputs, self.test_outputs, [self.input_features[0], self.input_features[1], "Happiness"]) # find model regression = MyLinearRegression() regression.fit(self.train_inputs, self.train_outputs) b = regression.b f = "f(x) = " + str(regression.intercept) for i in range(len(b)): f += " + " + str(b[i]) + "*x" + str(i + 1) print("model: " + f) # test model computed_test_results = regression.predict(self.test_inputs) print("prediction error: " + str(self.mean_square_error(computed_test_results))) # for problem with 1 or 2 features plot model and test results if nr_features == 1: plot_model_simple(self.train_inputs, self.train_outputs, b[0], regression.intercept, [self.input_features[0], "Happiness"]) plot_test_results_simple(self.test_inputs, self.test_outputs, computed_test_results, [self.input_features[0], "Happiness"]) elif nr_features == 2: plot_model_multiple( self.train_inputs, self.train_outputs, b[1], b[0], regression.intercept, [self.input_features[0], self.input_features[1], "Happiness"]) plot_test_results_multiple( self.test_inputs, self.test_outputs, computed_test_results, [self.input_features[0], self.input_features[1], "Happiness"]) # compare with sklearn results regression_sk = LinearRegression() regression_sk.fit(self.train_inputs, self.train_outputs) b = regression_sk.coef_ f = "f(x) = " + str(regression_sk.intercept_) for i in range(len(b)): f += " + " + str(b[i]) + "*x" + str(i + 1) print("model sk: " + f) computed_test_results_sk = regression_sk.predict(self.test_inputs) print("prediction error sk: " + str(self.mean_square_error(computed_test_results_sk)))
if (df.shape[1] > 1): X = np.array(df.iloc[:, 0:-1]).reshape(-1, len(df.columns) - 1) Y = np.array(df.iloc[:, -1]).reshape(-1,1) else: X = np.array(df.iloc[:, :]) print("Dataset without results, if visual asked, an array of zeros will be used") Y = np.zeros_like(X) else: X = ARGS["values"] Y = np.zeros_like(X) if ARGS.load: pkl = DataHandler(ARGS) PreP_x, PreP_y, theta = pkl.load() X = PreP_x.re_apply_minmax(X) Y = PreP_y.re_apply_minmax(Y) if type(X) == type(None): sys.exit() else: theta = [0] * (X.shape[1] + 1) print("Theta is: ", theta) lr = MyLinearRegression(theta, visual=ARGS.visual) value = lr.predict(X) print("Predicted value(s):\n", value) if ARGS.load: print("\twithout preprocessing:\n", PreP_y.unapply_minmax(value)) if ARGS.visual: lr.plot_results(X, Y)
X_train = train.drop(["rating_x", "user_id"], axis=1) # %%model evaluation kf = KFold(5) # lr = MyLinearRegression() # # lr = Lasso() #sklearn implementation as reference benchmark lr_rmse = [] for train_index, test_index in kf.split(X_train): lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index] lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index] lr = MyLinearRegression() # lr = MyLinearRegression(poly_degree=2) lr.fit(lr_X_train, lr_y_train) lr_rmse.append(np.sqrt(mean_squared_error( lr.predict(lr_X_test), lr_y_test))) print("LR, 5fold RMSE ", np.mean(lr_rmse)) # %%mlp mlp = tf.keras.models.Sequential() mlp.add(tf.keras.layers.Input([48, ])) mlp.add(tf.keras.layers.BatchNormalization()) mlp.add(tf.keras.layers.Dense(400, activation="sigmoid")) mlp.add(tf.keras.layers.BatchNormalization()) mlp.add(tf.keras.layers.Dropout(0.4)) mlp.add(tf.keras.layers.Dense(400, activation="sigmoid")) mlp.add(tf.keras.layers.Dense(1)) adam = tf.keras.optimizers.Adam(learning_rate=1e-4) scheduler = tf.keras.callbacks.ReduceLROnPlateau()
import pandas as pd import numpy as np from sklearn.metrics import mean_squared_error from my_linear_regression import MyLinearRegression as MyLR import matplotlib.pyplot as plt data = pd.read_csv("are_blue_pills_magics.csv") Xpill = np.array(data["Micrograms"]).reshape(-1, 1) Yscore = np.array(data["Score"]).reshape(-1, 1) linear_model1 = MyLR(np.array([[89.0], [-8]])) linear_model2 = MyLR(np.array([[89.0], [-6]])) Y_model1 = linear_model1.predict(Xpill) Y_model2 = linear_model2.predict(Xpill) # print("Me: ", linear_model1.mse_(Yscore, Y_model1)) # print("Sc: ", mean_squared_error(Yscore, Y_model1)) # print() # # print("Me: ", linear_model2.mse_(Yscore, Y_model2)) # print("Sc: ", mean_squared_error(Yscore, Y_model2)) def plot(x, y, theta): """Plot the data and prediction line from three non-empty numpy.ndarray. Args: x: has to be an numpy.ndarray, a vector of dimension m * 1. y: has to be an numpy.ndarray, a vector of dimension m * 1. theta: has to be an numpy.ndarray, a vector of dimension 2 * 1. Returns: Nothing. Raises:
def rmse_cv(*args, **kwargs): return np.mean(np.sqrt(cross_val_score(*args, scoring=mse))) # %%baseline kf = KFold(N_FOLD) lr_rmse = [] for train_index, test_index in kf.split(X_train): lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index] lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index] lr = MyLinearRegression() lr.fit(lr_X_train, lr_y_train) lr_rmse.append(np.sqrt(mean_squared_error( lr.predict(lr_X_test), lr_y_test))) print("LR 5fold RMSE ", np.mean(lr_rmse)) # n = 2 expansion lr_rmse = [] for train_index, test_index in kf.split(X_train): lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index] lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index] lr = MyLinearRegression(poly_degree=2) lr.fit(lr_X_train, lr_y_train) lr_rmse.append(np.sqrt(mean_squared_error( lr.predict(lr_X_test), lr_y_test))) print("LR, n=2 expansion, 5fold RMSE ", np.mean(lr_rmse))
import numpy as np from my_linear_regression import MyLinearRegression as MyLR if __name__ == "__main__": X = np.array([[1., 1., 2., 3.], [5., 8., 13., 21.], [34., 55., 89., 144.]]) Y = np.array([[23.], [48.], [218.]]) mylr = MyLR([[1.], [1.], [1.], [1.], [1]]) print("# Example 0:") print(mylr.predict(X)) print("# Output:") print("array([[8.], [48.], [323.]])") print() print("# Example 1:") print(mylr.cost_elem_(X,Y)) print("# Output:") print("array([[37.5], [0.], [1837.5]])") print() print("# Example 2:") print(mylr.cost_(X,Y)) print("# Output:") print(1875.0) print() # sys.lol() print("# Example 3:") mylr.fit_(X, Y) print(mylr.theta) print("# Output:")
y = dataset.iloc[:, 1].values # Splitting the dataset into the Training set and Test set X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=1 / 3, random_state=0) # Fitting Simple Linear Regression to the Training set regressor = MyLinearRegression() regressor.train(X, y) print(regressor.weight) print(regressor.bias) # Predicting the Test set results y_pred = regressor.predict(X_test) # Visualising the Training set results plt.scatter(X_train, y_train, color='red') plt.plot(X_train, regressor.predict(X_train), color='blue') plt.title('Salary vs Experience (Training set)') plt.xlabel('Years of Experience') plt.ylabel('Salary') plt.show() # Visualising the Test set results plt.plot(regressor.cost_trend, color='blue') plt.title('Cost Flow') plt.xlabel('Iterations') plt.ylabel('Cost') plt.show()