import func if __name__ == "__main__": print("读取示例数据....") data1 = pd.read_csv('resources/data1.txt', header=None, names=['x', 'y']) x = data1['x'] y = data1['y'] m = y.shape[0] # 给训练的变量加一行1,作为theta0的乘数,方便矩阵运算 x = np.row_stack((np.ones(m), x)) # print(x) theta = np.zeros(x.shape[0]) print("初始cost值: ", str(func.compute_cost(x, y, theta))) print("开始进行梯度下降...") theta = func.gradient_decent(x, y, theta) print('最终拟合图:') plt.plot(x[1, :], y, 'r*') plt.xlabel('x') plt.ylabel('y') plt.title('unary linear regression') plt.grid(True) plt.plot(x[1, :], x.T.dot(theta)) plt.legend(['First', 'second'], loc=2) plt.show()
# First task data = np.matrix(np.loadtxt('ex1data1.txt', delimiter=',')) X = data[:, 0] y = data[:, 1] # Second task plt.plot(X, y, 'g.') plt.title('Зависимость прибыльности от численности') plt.xlabel('Численность') plt.ylabel('Прибыльность') plt.show() # Cost function m = X.shape[0] X_ones = np.c_[np.ones((m, 1)), X] theta = np.matrix('[1; 2]') print(compute_cost(X_ones, y, theta)) # Call method gradient_descent theta, J_th = gradient_descent(X_ones, y, 0.02, 500) print(theta) # Cost change while gradient descent plt.plot(np.arange(500), J_th, 'k-') plt.title('Снижение ошибки при градиентном спуске') plt.xlabel('Итерация') plt.ylabel('Ошибка') plt.grid() plt.show() # Call method predict test = np.ones((2, 2))
square = data[:, 0] room = data[:, 1] price = data[:, 2] # нормализация данных square = normalize(square) room = normalize(room) price = normalize(price) X = data[:, 0:2] X_ones = np.c_[np.ones((X.shape[0], 1)), X] y = price theta = np.matrix('[1; 2; 3]') # вычисление стоимости primary_cost = compute_cost(X_ones, y, theta) print('initial cost -> ' + str(primary_cost)) # градиентный спуск theta, J_th = gradient_descent(X_ones, y, 0.000000002, 1000) plt.plot(np.arange(1000), J_th, 'k-') plt.title('Снижение ошибки при градиентном спуске') plt.xlabel('Итерация') plt.ylabel('Ошибка') plt.grid() plt.show() # веса print('weights:') print(theta)
import numpy as np from func import compute_cost, gradient_descent, predict data = np.matrix(np.loadtxt('ex1data2.txt', delimiter=',')) X = data[:, 0:2] X = np.c_[np.ones((X.shape[0], 1)), X] y = data[:, 2] # Nine task a = np.linalg.pinv(np.dot(X.T, X)) b = a.dot(X.T) c = b.dot(y) print(c) asd = compute_cost(X, y, c) print(asd) test = np.ones((2, 3)) test[0][1] = 272000 test[1][1] = 314000 test[0][2] = 2 test[1][2] = 3 print('prediction ->' + str(predict(test, c)))
data2 = pd.read_csv('resources/data2.txt', header=None) # print(data2.T) x = data2.T[0:2].values x = func.normalize_feature(x) # print(x) # 这里为不能用data2.T[2],与dataFrame.ix[n]方式不同,直接使用方括号按行索引取值,时必须指定范围,否则视为按列取 y = data2.T.ix[2].values print(y) m = y.shape[0] # 注意,len函数只会计算一列有多少个元素 x = np.row_stack((np.ones(m), x)) # 也可以写作 np.r_[np.ones((1, m)), x] print(y.shape) theta = np.zeros(x.shape[0]) # 这里应该根据x的列数进行计算 print("初始cost:", func.compute_cost(x, y, theta)) print("开始梯度下降....") theta = func.gradient_decent(x, y, theta) print("最终拟合图3D") fig = plt.figure() ax = Axes3D(fig) X = np.arange(0, 1, 0.01) Y = np.arange(0, 1, 0.01) X, Y = np.meshgrid(X, Y) # 将向量变为方阵 def f(x, y): return theta[0] + theta[1] * x + theta[2] * y ax.plot_surface(X, Y, f(X, Y), rstride=1, cstride=1)