def main(): data = datasets.load_iris() X = normalize_scaler(data.data) y = data.target X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33) clf = KNN(k=5) y_pred = clf.predict(X_test, X_train, y_train) accuracy = accuracy_score(y_test, y_pred) print("Accuracy:", accuracy)
def main(): print("Testing the performance of RegressionTree...") # Load data X, y = load_boston_house_prices() # Split data randomly, train set rate 70% x_train, x_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model reg = RegressionTree() reg.fit(X=x_train, y=y_train, max_depth=5) # Show rules reg.rules # Model evaluation get_r2(reg, x_test, y_test)
def main(): print("Tesing the accuracy of GBDT regressor...") boston = load_boston() X = list(boston.data) y = list(boston.target) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) reg = GradientBoostingRegressor() reg.fit(X=X_train, y=y_train, n_estimators=4, lr=0.5, max_depth=2, min_samples_split=2) get_r2(reg, X_test, y_test)
s += 1 if self._predict(x_test[i]) == y_test[i]: right += 1 return right / s def __repr__(self): return "KNN(k = %d)" % self.k if __name__ == "__main__": iris = datasets.load_iris() # 导入数据 x = iris.data # 将特征值和目标值分别赋给x,y y = iris.target # 将数据分为训练集和测试集 x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.25) # 对训练集和测试集中的特征值数据进行标准化 std = StandardScaler() x_train = std.fit_transform(x_train) x_test = std.transform(x_test) # 使用knn分类器 knn = KNNClassifier(4) knn.fit(x_train, y_train) # 输入测试集 predict = knn.predict(x_test) # 准确率 accuracy = knn.score(x_test, y_test)
else: nd = nd.right return nd.score # 预测多个样本 def predict(self, X): return [self._predict(xi) for xi in X] if __name__ == '__main__': print("Testing the accuracy of Regression Tree...") # 加载数据 boston = load_boston() X = list(boston.data) y = list(boston.target) X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=10) # Train model reg = RegressionTree() reg.fit(X=X_train, y=y_train, max_depth=4) # Show rules reg.print_rules() # Model accuracy get_r2(reg, X_test, y_test) #model_evaluation(reg, X_test, y_test)
def main(): #load data X = np.loadtxt("data/input.txt") y = np.loadtxt("data/output.txt") # data = get_data("data.csv") # X,y = preprocessing(data) n_samples, n_features = X.shape X = min_max_scaler(X) X_train, X_test, y_train, y_test = train_test_split(X, y, 0.1, shuffle=True, seed=1000) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 0.1, shuffle=True, seed=1000) validation_data = (X_val, y_val) GD = GradientDescent(0.001) SGD = StochasticGradientDescent(learning_rate=0.001, momentum=0.9, nesterov=False) SGD_nes = StochasticGradientDescent(learning_rate=0.001, momentum=0.9, nesterov=True) Ada = Adagrad(learning_rate=0.001, epsilon=1e-6) Adad = Adadelta(rho=0.9, epsilon=1e-6) RMS = RMSProp(learning_rate=0.01) Adam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) Adamax_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) NAdam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) NAdam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) model = Neural_Networks(optimizer=Adam_opt, loss=SquareLoss, validation_data=validation_data) model.add(Dense(200, input_shape=(n_features, ))) model.add(Activation('sigmoid')) model.add(Dense(100)) model.add(Activation('tanh')) model.add(Dense(100)) model.add(Activation('sigmoid')) model.add(Dense(2)) model.add(Activation('linear')) train_err, val_err = model.fit(X_train, y_train, n_epochs=500, batch_size=8) print(len(train_err)) print(len(val_err)) print("Training and validate errors", train_err[-1], val_err[-1]) SGD_err = np.concatenate( (np.array(train_err).reshape(-1, 1), np.array(val_err).reshape(-1, 1)), axis=1) np.savetxt("Adam_err.txt", SGD_err, delimiter=',') y_train_pred = model.predict(X_train) print("===Training results===") print("R-square on y1", R_square(y_train_pred[0], y_train[0])) print("R-square on y2", R_square(y_train_pred[1], y_train[1])) print("Overal error on traning set",(mean_squared_error(y_train_pred[0], y_train[0]) + \ mean_squared_error(y_train_pred[1], y_test[1]))/2) y_val_pred = model.predict(X_val) print("===Validation results===") print("R-square on y1", R_square(y_val_pred[0], y_val[0])) print("R-square on y2", R_square(y_val_pred[1], y_val[1])) print("Overal error on valiation set",(mean_squared_error(y_val_pred[0], y_val[0]) + \ mean_squared_error(y_val_pred[1], y_val[1]))/2) y_pred = model.predict(X_test) print("===Testing results===") print("The portions of training is %0.2f , validation is %0.2f and testing data is %0.2f"\ %(len(y_train)/len(y)*100, len(y_val)/len(y)*100, len(y_test)/len(y)*100)) print("Result on blind test samples") print("R2 value on the y1", R_square(y_pred[0], y_test[0])) print("R2 value on the y2", R_square(y_pred[1], y_test[1])) print("Overal blind test error", (mean_squared_error(y_pred[0], y_test[0]) + \ mean_squared_error(y_pred[1], y_test[1]))/2 )
return 1 / (1 + np.exp(-t)) def __repr__(self): return "Logistic Regression" if __name__ == '__main__': iris = datasets.load_iris() X = iris.data y = iris.target X = X[y < 2, :2] # 做二分类,鸢尾花有三种数据,因此取前两类,要做可视化,因此取前两个特征 y = y[y < 2] # 使用逻辑回归进行分类 X_train, X_test, y_train, y_test = model_selection.train_test_split(X, y, seed=666) logistic_regression = LogisticRegression() logistic_regression.fit(X_train, y_train) # 决策边界的两个参数 x1_plot = np.linspace(4, 8, 1000) x2_plot = (-logistic_regression.coef_[0] * x1_plot - logistic_regression.interception_) / logistic_regression.coef_[ 1] # 数据及决策边界的可视化 DecisionBoundary.plot_decision_boundary(logistic_regression, axis=[4, 7.5, 1.5, 4.5]) plt.scatter(X[y == 0, 0], X[y == 0, 1], color='red') plt.scatter(X[y == 1, 0], X[y == 1, 1], color='blue') plt.show()
def main(): #load data # X = np.loadtxt("data/input.txt") # y = np.loadtxt("data/output.txt") data = get_data("data.csv") X, y = preprocessing(data) n_samples, n_features = X.shape X = min_max_scaler(X) y = min_max_scaler(y) X_train, X_test, y_train, y_test = train_test_split(X, y, 0.1, shuffle=True, seed=1000) X_train, X_val, y_train, y_val = train_test_split(X_train, y_train, 0.1, shuffle=True, seed=1000) validation_data = (X_val, y_val) GD = GradientDescent(0.001) SGD = StochasticGradientDescent(learning_rate=0.001, momentum=0.9, nesterov=True) Ada = Adagrad(learning_rate=0.001, epsilon=1e-6) Adad = Adadelta(rho=0.9, epsilon=1e-6) RMS = RMSProp(learning_rate=0.01) Adam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) Adamax_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) NAdam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) NAdam_opt = Adam(learning_rate=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-6) model = Neural_Networks(optimizer=Adam_opt, loss=SquareLoss, validation_data=validation_data) model.add(Dense(200, input_shape=(n_features, ))) model.add(Activation('sigmoid')) model.add(Dense(100)) model.add(Activation('sigmoid')) model.add(Dense(2)) model.add(Activation('linear')) train_err, val_err = model.fit(X_train, y_train, n_epochs=1000, batch_size=256) y_train_pred = model.predict(X_train) print("===Training results===") print("R-square on y1", R_square(y_train_pred[0], y_train[0])) print("Overal error on traning set", (mean_squared_error(y_train_pred[0], y_train[0])) / 2) y_val_pred = model.predict(X_val) print("===Validation results===") print("R-square on y1", R_square(y_val_pred[0], y_val[0])) print("Overal error on valiation set", (mean_squared_error(y_val_pred[0], y_val[0])) / 2) y_pred = model.predict(X_test) print("===Testing results===") print("The portions of training is %0.2f , validation is %0.2f and testing data is %0.2f"\ %(len(y_train)/len(y)*100, len(y_val)/len(y)*100, len(y_test)/len(y)*100)) print("Result on blind test samples") print("R2 value on the y1", R_square(y_pred[0], y_test[0])) print("Overal blind test error", (mean_squared_error(y_pred[0], y_test[0])) / 2) plt.plot(train_err, 'r', label="training") plt.plot(val_err, 'b', label='validation') plt.xlabel("Iterations") plt.ylabel("Error") plt.legend() plt.show() plt.plot(np.arange(len(y_pred)), y_pred[:, 0], 'r', label='y1 predict') plt.plot(np.arange(len(y_pred)), y_test[:, 0], 'b', label='y1 actual') # plt.plot(np.arange(len(y_pred)), y_pred[:,1], 'g', label = 'y2 predict') # plt.plot(np.arange(len(y_pred)), y_test[:,1], 'k', label = 'y2 actual') plt.title("Result of blind test") plt.legend() plt.show() plt.plot(np.arange(len(y_train_pred)), y_train_pred[:, 0], 'r', label='y1 training predict') plt.plot(np.arange(len(y_train)), y_train[:, 0], 'b', label='y1 training actual') # plt.plot(np.arange(len(y_train)), y_train_pred[:,1], 'g', label = 'y2 training predict') # plt.plot(np.arange(len(y_train)), y_train[:,1], 'k', label = 'y2 training actual') plt.title("Result of traing set") plt.legend() plt.show() plt.plot(np.arange(len(y_val)), y_val_pred[:, 0], 'r', label='y1 val predict') plt.plot(np.arange(len(y_val)), y_val[:, 0], 'b', label='y1 val actual') # plt.plot(np.arange(len(y_val)), y_val_pred[:,1], 'g', label = 'y2 val predict') # plt.plot(np.arange(len(y_val)), y_val[:,1], 'k', label = 'y2 val actual') plt.title("Result of validation set") plt.legend() plt.show()
self._w = self._GD(X, y_train, init_w, lr, n_iters, epsilon) self.intercept_ = self._w[0] self.coef_ = self._w[1:] return self def predict(self, X_predict): X = np.hstack([X_predict, np.ones((len(X_predict), 1))]) return self._sigmoid(X.dot(self._w)) if __name__ == "__main__": from sklearn import datasets iris = datasets.load_iris() X = iris.data y = iris.target X = X[y < 2, :] y = y[y < 2] X_train, X_test, y_train, y_test = ms.train_test_split(X, y, test_ratio=0.4) LRC = LogisticRegressionClassifier() LRC = LRC.fit(X_train, y_train, n_iters=100) predict = LRC.predict(X_test) print(predict) print(y_test) y_predict = predict > 0.5 accuracy = np.sum(y_predict == y_test) / y_test.shape[0] print(accuracy)