def bagging(x_in, y_in, x_test_in, y_test_in): """改变权重,训练多个分类器,投票""" # 先预测 clf1 = LogisticRegression().fit(x_in, y_in) predict1 = clf1.predict(x_test_in, y_test_in) clf2 = LinearSVM().fit(x_in, y_in) predict2 = clf2.predict(x_test_in, y_test_in) clf3 = CartDecisionTree().fit(x_in, y_in) predict3 = clf3.predict(x_test_in, y_test_in) # 收集投票 predict = np.zeros_like(predict1) count = 0 for i in range(np.size(y_test_in, axis=0)): if predict1[i] == predict2[i]: predict[i] = predict2[i] elif predict1[i] == predict3[i]: predict[i] = predict1[i] else: predict[i] = predict2[i] if predict[i] == y_test_in[i]: count += 1 acc = count / np.size(y_test_in, axis=0) * 100 print("Bagging ACC: %.2f%%" % acc) return 0
y = pd.Series(y) result=[] x = [] for j in range(30): ans = [] for i in range(5): X1 = X[0:i*20] X2 = X[(i+1)*20:] X_train = X1.append(X2) X_test = X[i*20:(i+1)*20] y1 = y[0:i*20] y2 = y[(i+1)*20:] y_train = y1.append(y2) y_test = y[i*20:(i+1)*20] clf = LogisticRegression(l1_coef= j*2).l1_fit(X_train,y_train) y_hat = clf.predict(X_test) y_t = list(y_test) answer = 0 for i in range(len(y_t)): if int(y_hat[i]) == y_t[i]: answer+=1 ans.append((answer)/len(y_t)) result.append((sum(ans)/len(ans))) x.append(j*5) print(ans) print(result) plt.plot(x,result) plt.xlabel("Panelty Coefficient") plt.ylabel("Accuracy") plt.show()
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer from sklearn.datasets import make_classification import pandas as pd from sklearn.model_selection import train_test_split import numpy as np X, y = make_classification(n_samples=1000) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42) clf = LogisticRegression().l1_fit(X_train, y_train) y_hat = (clf.predict(X_test)) ans = clf.score1(y_hat,y_test) print("For L1 regularised Logistic Regression ") print(ans) clf = LogisticRegression().l2_fit(X_train, y_train) y_hat = (clf.predict(X_test)) ans = clf.score2(y_hat,y_test) print("For L2 regularised Logistic Regression ") print(ans)
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer import pandas as pd import numpy as np N = 50 P = 8 X = pd.DataFrame(np.random.randn(N, P)) y = pd.Series(np.random.randint(0,2,N)) clf = LogisticRegression().fit(X, y) y_hat = (clf.predict(X)) ans = clf.score1(y_hat,y) print("Accuracy with Gradient_Descent Normally") print(ans) clf = LogisticRegression().fit_autograd(X, y) y_hat = (clf.predict(X)) ans = clf.score2(y_hat,y) print("Accuracy with Autograd Implementation") print(ans)
from Logistic_Regression import LogisticRegression from sklearn.datasets import load_breast_cancer import pandas as pd from sklearn.model_selection import KFold import numpy as np data = np.array(load_breast_cancer().data) y = np.array(load_breast_cancer().target) kf = KFold(n_splits=3) for train_index,test_index in kf.split(data): X_train,X_test = data[train_index], data[test_index] y_train,y_test = y[train_index], y[test_index] X_train = pd.DataFrame(X_train) X_test = pd.DataFrame(X_test) y_train = pd.Series(y_train) y_test = pd.Series(y_test) clf = LogisticRegression().fit(X_train, y_train) y_hat = list(clf.predict(X_test)) y_t = list(y_test) print("Overall Accuracy with K = 3 Folds") print(clf.score1(y_hat,y_t))
from sklearn.model_selection import train_test_split X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.25, random_state = 0) # Feature Scaling from sklearn.preprocessing import StandardScaler sc = StandardScaler() X_train = sc.fit_transform(X_train) X_test = sc.transform(X_test) # Fitting Logistic Regression to the Training set classifier = LogisticRegression(lr=0.001) classifier.fit(X_train, y_train) # Predicting the Test set results y_pred = np.array(classifier.predict(X_test)) # Making the Confusion Matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(y_test, y_pred) # Visualising the Training set results from matplotlib.colors import ListedColormap X_set, y_set = X_train, y_train X1, X2 = np.meshgrid(np.arange(start = X_set[:, 0].min() - 1, stop = X_set[:, 0].max() + 1, step = 0.01), np.arange(start = X_set[:, 1].min() - 1, stop = X_set[:, 1].max() + 1, step = 0.01)) plt.contourf(X1, X2, np.array(classifier.predict(np.array([X1.ravel(), X2.ravel()]).T)).reshape(X1.shape), alpha = 0.75, cmap = ListedColormap(('red', 'green'))) plt.xlim(X1.min(), X1.max()) plt.ylim(X2.min(), X2.max()) for i, j in enumerate(np.unique(y_set)):
import numpy as np import matplotlib.pyplot as plt # import seaborn as sns from sklearn import datasets from Logistic_Regression import LogisticRegression iris = datasets.load_iris() X = iris.data[:, :2] y = (iris.target != 0) *1 clf = LogisticRegression() clf.fit(X,y) pred = clf.predict(X) plt.figure(figsize=(10, 6)) plt.scatter(X[y == 0][:, 0], X[y == 0][:, 1], color='b', label='0') plt.scatter(X[y == 1][:, 0], X[y == 1][:, 1], color='r', label='1') plt.legend() x1_min, x1_max = X[:,0].min(), X[:,0].max(), x2_min, x2_max = X[:,1].min(), X[:,1].max(), xx1, xx2 = np.meshgrid(np.linspace(x1_min, x1_max), np.linspace(x2_min, x2_max)) grid = np.c_[xx1.ravel(), xx2.ravel()] probs = clf.predict_prob(grid).reshape(xx1.shape) plt.contour(xx1, xx2, probs, [0.5], linewidths=1, colors='black') plt.show()
def adaboost(x_ada, y_ada, x_test_in, y_test_in): # 初始化权重 weight = np.ones((np.size(x_ada, axis=0), 1)) weight /= np.size(x_ada, axis=0) weight_list = [] classifier_list = [] # 训练算法 clf1 = LogisticRegression().fit(x_ada, y_ada) predict1 = clf1.predict(x_ada, y_ada) clf2 = LinearSVM().fit(x_ada, y_ada) predict2 = clf2.predict(x_ada, y_ada) clf3 = CartDecisionTree().fit(x_ada, y_ada) predict3 = clf3.predict(x_ada, y_ada) # 组合分类器 for i in range(Adaboost_EPOCH): e1 = 0 e2 = 0 e3 = 0 # 计算误差 for j in range(np.size(x_ada, axis=0)): if predict1[j] != y_ada[j]: e1 += weight[j] if predict2[j] != y_ada[j]: e2 += weight[j] if predict3[j] != y_ada[j]: e3 += weight[j] # 选择小误差的模型 if e1[0] <= e2[0] and e1[0] <= e3[0]: clf = clf1 a = 1 / 2 * np.log((1 - e1[0]) / e1[0]) predict = predict1 elif e2[0] <= e1[0] and e2[0] <= e3[0]: clf = clf2 a = 1 / 2 * np.log((1 - e2[0]) / e2[0]) predict = predict2 else: clf = clf3 a = 1 / 2 * np.log((1 - e3[0]) / e3[0]) predict = predict3 # 更新权重 z = np.sum(np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4), axis=0) # x, y化成0或1 weight = weight * np.exp(-a * (y_ada - 0.5) * (predict - 0.5) * 4) / z weight_list.append(a) classifier_list.append(clf) # 评估acc predict_sum = 0 predict_get = np.zeros_like(y_test_in) acc_count = 0 for l in range(Adaboost_EPOCH): predict_sum += weight_list[l] * ( classifier_list[l].predict(x_test_in, y_test_in) - 0.5) * 2 for k in range(np.size(y_test_in, axis=0)): if predict_sum[k] / Adaboost_EPOCH >= 0: predict_get[k] = 1 else: predict_get[k] = 0 if predict_get[k] == y_test_in[k]: acc_count += 1 acc = acc_count / np.size(y_test_in, axis=0) * 100 print('Adaboost ACC: %.2f%%' % acc) return 0
X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) f_measure_test = [] f_measure_train = [] Lambda = [] # Training logistic regression classifier with L2 penalty for i in float_range(-2, -0.2, 0.2): C_ = 1 / i LR = LogisticRegression(learningRate=0.1, numEpochs=10, penalty='L2', C=i) # range from 0.01 - 0.03 LR.train(X_train_pca, y_train, tol=10**-3) # LR.plotCost() # Testing fitted model on test data with cutoff probability 50% predictions, probs = LR.predict(X_test_pca, 0.5) performance = LR.performanceEval(predictions, y_test) # added predictions_train, probs_train = LR.predict(X_train_pca, 0.5) performance_train = LR.performanceEval(predictions_train, y_train) # LR.plotDecisionRegions(X_test_pca, y_test) # LR.predictionPlot(X_test_pca, y_test) # Print out performance values for key, value in performance.items(): print('%s : %.2f' % (key, value)) print("\n") f_measure_test.append(list(performance.values())[len(performance) - 1]) f_measure_train.append( list(performance_train.values())[len(performance_train) - 1]) Lambda.append(i)