def lasso_test(X, y, degree=1): X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) lasso_reg = LassoRegression(degree=degree) lasso_reg.fit(X_train, y_train, lasso=True, method="bgd") print(lasso_reg.score(X_test, y_test)) X_test = X_test[:5] y_predict = lasso_reg.predict(X_test) y_true = y_test[:5] for i in range(len(y_true)): print(y_true[i], y_predict[i]) print()
def poly_test(X, y, degree=1): X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) poly_reg = LassoRegression(degree=degree) poly_reg.fit(X_train, y_train, lasso=False, method="normal") print(poly_reg.score(X_test, y_test)) X_test = X_test[:5] y_predict = poly_reg.predict(X_test) y_true = y_test[:5] for i in range(len(y_true)): print(y_true[i], y_predict[i]) print()
y=iris.target # # x_train,x_test,y_train,y_test = train_test_split(x,y) # # my_knn=KNNClassifier(k=3) # my_knn.fit(x_train,y_train) # y_predict=my_knn.predict(x_test) # print(y_test) # print(y_predict) # print(sum(y_predict==y_test)/len(y_test)) ######################################################### from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2) # kNN_classifier = KNeighborsClassifier(n_neighbors=3) # kNN_classifier.fit(x_train,y_train) # y_predict = kNN_classifier.predict(x_test) # # print(sum(y_predict==y_test)/len(y_test)) # # # best_score = 0.0 # best_k = -1 # best_p = -1 # # for k in range(1, 11): # for p in range(1, 6): # knn_clf = KNeighborsClassifier(n_neighbors=k, weights="distance", p=p)#考虑了P就确定了考虑了距离,所以weights="distance" # knn_clf.fit(x_train, y_train) # score = knn_clf.score(x_test, y_test)
from KNN import KNNClassifier from model_selection import train_test_split from metric import accuracy_score import pandas as pd import numpy as np if __name__ == '__main__': """ """ iris = pd.read_csv('../iris.data', header=None) iris_data = iris.loc[:, :].values x_data = iris_data[:, [0, 2]] y_data = iris_data[:, 4] x_predict = np.array([[5.1, 2.1]]) x_train, y_train, x_test, y_test = train_test_split(x_data, y_data) best_score = 0.0 best_k = -1 for k in range(1, 11): knn_clf = KNNClassifier(n_neighbors=k) knn_clf.fit(x_train, y_train) score = knn_clf.score(x_test, y_test) if score > best_score: best_k = k best_score = score print("best_k =", best_k) print('best_score =', best_score)
# plt.show() #鸢尾花逻辑分类 import numpy as np import matplotlib.pyplot as plt from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target X = X[y < 2, :2] y = y[y < 2] from model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, seed=666) from LogisticRegression import LogisticRegression log_reg = LogisticRegression() log_reg.fit(X_train, y_train) # print(log_reg.score(X_test,y_test))#1.0 # # print(log_reg.predict_proba(X_test)) # #[0.92972035 0.98664939 0.14852024 0.01685947 0.0369836 0.0186637 # # 0.04936918 0.99669244 0.97993941 0.74524655 0.04473194 0.00339285 # # 0.26131273 0.0369836 0.84192923 0.79892262 0.82890209 0.32358166 # # 0.06535323 0.20735334] # # print(log_reg.coef_) #[ 3.01796521 -5.04447145] # print(log_reg.intercept_) #-0.6937719272911225
from kNN import kNNClassifier from model_selection import train_test_split from sklearn import datasets from metrics import accuracy_score from preprocessing import StandardScaler #导入iris数据 iris = datasets.load_iris() X = iris.data y = iris.target standardScaler = StandardScaler() standardScaler.fit #将数据集拆分为train、test数据 X_train, X_test, y_train, y_test = train_test_split(X, y, 0.2) #数据的归一化(训练数据集、测试数据集的归一化) standardScaler = StandardScaler() standardScaler.fit(X_train) X_train = standardScaler.transform(X_train) X_test = standardScaler.transform(X_test) print(X_test) #使用上述数据进行训练和预测 kcf = kNNClassifier(3) kcf.fit(X_train, y_train) y_predict = kcf.predict(X_test) #计算预测准确度 accuracy = accuracy_score(y_test, y_predict)
from linear_regression import MultipleLinearRegression from metric import mean_squared_error from metric import mean_absolute_error from metric import root_mean_squared_error from metric import r2_score if __name__ == '__main__': boston = datasets.load_boston() X = boston.data y = boston.target X = X[y < 50.0] y = y[y < 50.0] X_train, y_train, X_test, y_test = train_test_split(X, y, seed=666) reg = MultipleLinearRegression() reg.fit_normal(X_train, y_train) print(reg.coef_, reg.bias_) print(reg.score(X_test, y_test)) """ if __name__ == '__main__': boston = datasets.load_boston() print(boston.feature_names) x = boston.data[:, 5] y = boston.target x = x[y < 50.0] y = y[y < 50.0]
from regression.logistic_regression import LogisticRegression from metric import mean_squared_error from metric import mean_absolute_error from metric import root_mean_squared_error from metric import r2_score import pandas as pd; if __name__ == '__main__': boston = datasets.load_boston() X = boston.data y = boston.target X = X[y < 50.0] y = y[y < 50.0] X_train, y_train, X_test, y_test = train_test_split(X, y, seed=666) reg = MultipleLinearRegression() reg.fit_normal(X_train, y_train) print(reg.coef_, reg.bias_) print(reg.score(X_test, y_test)) #燕尾挖数据做测试 iris = pd.read_csv('../iris.data', header=None) iris_data = iris.loc[:, :].values x_data = iris_data[:100, :2] y_data = iris_data[:100, 4] y_data[y_data == 'Iris-setosa'] = 0 y_data[y_data == 'Iris-versicolor'] = 1 x_train, y_train, x_test, y_test = train_test_split(x_data, y_data)
# 取房间数量作为特征值 x = boston.data[:, 5] y = boston.target plt.scatter(x, y) plt.show() # 去除一些噪音 x = x[y < 50] y = y[y < 50] plt.scatter(x, y) plt.show() x_train, x_test, y_train, y_test = train_test_split(x, y, seed=666) print(x_train.shape) reg = SimpleLinearRegression2() reg.fit(x_train, y_train) plt.scatter(x_train, y_train) plt.plot(x_train, reg.predict(x_train), color='r') plt.show() y_predict = reg.predict(x_test) # MSE mse_test = np.sum((y_test - y_predict)**2) / len(y_test) print(mse_test)