Пример #1
0
    def solve_linear_regression(self):
        nr_features = len(self.train_inputs[0])
        # for problem with 1 or 2 features plot data split
        if nr_features == 1:
            plot_data_split_simple(self.train_inputs, self.train_outputs,
                                   self.test_inputs, self.test_outputs,
                                   [self.input_features[0], "Happiness"])
        elif nr_features == 2:
            plot_data_split_multiple(
                self.train_inputs, self.train_outputs, self.test_inputs,
                self.test_outputs,
                [self.input_features[0], self.input_features[1], "Happiness"])
        # find model
        regression = MyLinearRegression()
        regression.fit(self.train_inputs, self.train_outputs)
        b = regression.b
        f = "f(x) = " + str(regression.intercept)
        for i in range(len(b)):
            f += " + " + str(b[i]) + "*x" + str(i + 1)
        print("model: " + f)
        # test model
        computed_test_results = regression.predict(self.test_inputs)
        print("prediction error: " +
              str(self.mean_square_error(computed_test_results)))

        # for problem with 1 or 2 features plot model and test results
        if nr_features == 1:
            plot_model_simple(self.train_inputs, self.train_outputs, b[0],
                              regression.intercept,
                              [self.input_features[0], "Happiness"])
            plot_test_results_simple(self.test_inputs, self.test_outputs,
                                     computed_test_results,
                                     [self.input_features[0], "Happiness"])
        elif nr_features == 2:
            plot_model_multiple(
                self.train_inputs, self.train_outputs, b[1], b[0],
                regression.intercept,
                [self.input_features[0], self.input_features[1], "Happiness"])
            plot_test_results_multiple(
                self.test_inputs, self.test_outputs, computed_test_results,
                [self.input_features[0], self.input_features[1], "Happiness"])

        # compare with sklearn results
        regression_sk = LinearRegression()
        regression_sk.fit(self.train_inputs, self.train_outputs)
        b = regression_sk.coef_
        f = "f(x) = " + str(regression_sk.intercept_)
        for i in range(len(b)):
            f += " + " + str(b[i]) + "*x" + str(i + 1)
        print("model sk: " + f)
        computed_test_results_sk = regression_sk.predict(self.test_inputs)
        print("prediction error sk: " +
              str(self.mean_square_error(computed_test_results_sk)))
Пример #2
0
from my_linear_regression import MyLinearRegression
from polynomial_model import add_polynomial_features

if __name__ == "__main__":
    x = np.arange(1, 11).reshape(-1, 1)
    y = np.array([[1.39270298], [3.88237651], [4.37726357], [4.63389049],
                  [7.79814439], [6.41717461], [8.63429886], [8.19939795],
                  [10.37567392], [10.68238222]])
    plt.scatter(x, y)
    plt.show()

    i = 2
    arr = np.zeros(9)
    l = list(range(2, 11))
    while i <= 10:
        x_ = add_polynomial_features(x, i)
        my_lr = MyLinearRegression(np.ones(i + 1).reshape(-1, 1))
        my_lr.fit_(x_, y)
        arr[i - 2] = (my_lr.cost_(my_lr.predict_(x_), y))

        continuous_x = np.arange(1, 10.01, 0.01).reshape(-1, 1)
        x_ = add_polynomial_features(continuous_x, i)
        y_hat = my_lr.predict_(x_)

        plt.scatter(x, y)
        plt.plot(continuous_x, y_hat, color='orange')
        plt.show()
        i += 1
    plt.bar(l, arr, color='orange')
    plt.show()
    print(arr)
Пример #3
0
		if (df.shape[1] > 1):
			X = np.array(df.iloc[:, 0:-1]).reshape(-1, len(df.columns) - 1)
			Y = np.array(df.iloc[:, -1]).reshape(-1,1)
		else:
			X = np.array(df.iloc[:, :])
			print("Dataset without results, if visual asked, an array of zeros will be used")
			Y = np.zeros_like(X)
	else:
		X = ARGS["values"]
		Y = np.zeros_like(X)
	if ARGS.load:
		pkl = DataHandler(ARGS)
		PreP_x, PreP_y, theta = pkl.load()
		X = PreP_x.re_apply_minmax(X)
		Y = PreP_y.re_apply_minmax(Y)
		if type(X) == type(None):
			sys.exit()
	else:
		theta = [0] * (X.shape[1] + 1)

	print("Theta is: ", theta)

	lr = MyLinearRegression(theta, visual=ARGS.visual)
	value = lr.predict(X)
	print("Predicted value(s):\n", value)
	if ARGS.load:
		print("\twithout preprocessing:\n", PreP_y.unapply_minmax(value))

	if ARGS.visual:
		lr.plot_results(X, Y)
Пример #4
0
merged = pd.merge(user_rating, teleplay, on="teleplay_id")

# %%
train = merged[merged["rating_x"] != -1]  # rated movies
y_train = train["rating_x"]
X_train = train.drop(["rating_x", "user_id"], axis=1)
# %%model evaluation
kf = KFold(5)
# lr = MyLinearRegression()
# # lr = Lasso() #sklearn implementation as reference benchmark

lr_rmse = []
for train_index, test_index in kf.split(X_train):
    lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index]
    lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index]
    lr = MyLinearRegression()
    # lr = MyLinearRegression(poly_degree=2)

    lr.fit(lr_X_train, lr_y_train)
    lr_rmse.append(np.sqrt(mean_squared_error(
        lr.predict(lr_X_test), lr_y_test)))

print("LR, 5fold RMSE ", np.mean(lr_rmse))

# %%mlp
mlp = tf.keras.models.Sequential()
mlp.add(tf.keras.layers.Input([48, ]))
mlp.add(tf.keras.layers.BatchNormalization())
mlp.add(tf.keras.layers.Dense(400, activation="sigmoid"))
mlp.add(tf.keras.layers.BatchNormalization())
mlp.add(tf.keras.layers.Dropout(0.4))
Пример #5
0
OPT_N_FOLD = 3  # used for hyper-param searching
SEED = 4434
mse = make_scorer(mean_squared_error)


def rmse_cv(*args, **kwargs):
    return np.mean(np.sqrt(cross_val_score(*args, scoring=mse)))


# %%baseline
kf = KFold(N_FOLD)
lr_rmse = []
for train_index, test_index in kf.split(X_train):
    lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index]
    lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index]
    lr = MyLinearRegression()
    lr.fit(lr_X_train, lr_y_train)
    lr_rmse.append(np.sqrt(mean_squared_error(
        lr.predict(lr_X_test), lr_y_test)))

print("LR 5fold RMSE ", np.mean(lr_rmse))

# n = 2 expansion
lr_rmse = []
for train_index, test_index in kf.split(X_train):
    lr_X_train, lr_X_test = X_train.iloc[train_index], X_train.iloc[test_index]
    lr_y_train, lr_y_test = y_train.iloc[train_index], y_train.iloc[test_index]
    lr = MyLinearRegression(poly_degree=2)
    lr.fit(lr_X_train, lr_y_train)
    lr_rmse.append(np.sqrt(mean_squared_error(
        lr.predict(lr_X_test), lr_y_test)))
Пример #6
0
    Y = np.array(df.iloc[:, -1]).reshape(-1, 1)

    pkl = DataHandler(ARGS)

    if ARGS.load:
        PreP_x, PreP_y, theta = pkl.load()
        if PreP_x.scaler:
            X = PreP_x.re_apply_minmax(X)
        if PreP_y.scaler:
            Y = PreP_y.re_apply_minmax(Y)
        if type(X) == type(None) or type(Y) == type(None):
            sys.exit()
    else:
        PreP_x = Preprocessing(X, scaler=ARGS.scaler)
        PreP_y = Preprocessing(Y, scaler=ARGS.scaler)
        X = PreP_x.data
        Y = PreP_y.data
        theta = [1] * (X.shape[1] + 1)

    lr = MyLinearRegression(theta,
                            alpha=ARGS.alpha,
                            n_cycle=ARGS.n_cycle,
                            visual=ARGS.visual)
    err = lr.fit(X, Y)
    if type(err) == type(None):
        sys.exit()

    pkl.save(PreP_x, PreP_y, lr.theta)
    if ARGS.visual:
        lr.plot_results(X, Y)