# Load Matlab data file and extract variables of interest mat_data = loadmat('../Data/synth7.mat') X = np.matrix(mat_data['X']) y = np.matrix(mat_data['y']) attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()] classNames = [name[0][0] for name in mat_data['classNames']] N, M = X.shape C = len(classNames) # Number of rounds of bagging L = 100 # Fit model using random tree classifier: rf_classifier = RandomForestClassifier(L) rf_classifier.fit(X.A, y.A.ravel()) y_est = rf_classifier.predict(X).T y_est_prob = rf_classifier.predict_proba(X).T # Compute classification error ErrorRate = (y != np.mat(y_est).T).sum(dtype=float) / N print('Error rate: {:.2f}%'.format(ErrorRate * 100)) # Plot decision boundaries figure(1) dbprobplot(rf_classifier, X, y, 'auto', resolution=400) figure(2) dbplot(rf_classifier, X, y, 'auto', resolution=400) show()
from sklearn.ensemble import RandomForestClassifier # Load Matlab data file and extract variables of interest mat_data = loadmat('../Data/synth7.mat') X = np.matrix(mat_data['X']) y = np.matrix(mat_data['y']) attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()] classNames = [name[0][0] for name in mat_data['classNames']] N, M = X.shape C = len(classNames) # Number of rounds of bagging L = 100 # Fit model using random tree classifier: rf_classifier = RandomForestClassifier(L) rf_classifier.fit(X.A, y.A.ravel()) y_est = rf_classifier.predict(X).T y_est_prob = rf_classifier.predict_proba(X).T # Compute classification error ErrorRate = (y!=np.mat(y_est).T).sum(dtype=float)/N print('Error rate: {:.2f}%'.format(ErrorRate*100)) # Plot decision boundaries figure(1); dbprobplot(rf_classifier, X, y, 'auto', resolution=400) figure(2); dbplot(rf_classifier, X, y, 'auto', resolution=400) show()
# For each round of bagging for l in range(L): # Extract training set by random sampling with replacement from X and y X_train, y_train = bootstrap(X, y, N, weights) # Fit logistic regression model to training data and save result logit_classifier = LogisticRegression() logit_classifier.fit(X_train, y_train) logits[l] = logit_classifier y_est = logit_classifier.predict(X).T votes = votes + y_est ErrorRate = (y!=y_est).sum(dtype=float)/N print('Error rate: {:2.2f}%'.format(ErrorRate*100)) # Estimated value of class labels (using 0.5 as threshold) by majority voting y_est_ensemble = votes>(L/2) # Compute error rate ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N print('Error rate: {:3.2f}%'.format(ErrorRate*100)) ce = BinClassifierEnsemble(logits) figure(1); dbprobplot(ce, X, y, 'auto', resolution=200) figure(2); dbplot(ce, X, y, 'auto', resolution=200) show() print('Ran Exercise 9.2.1')
# For each round of bagging for l in range(L): # Extract training set by random sampling with replacement from X and y X_train, y_train = bootstrap(X, y, N, weights) # Fit logistic regression model to training data and save result logit_classifier = LogisticRegression() logit_classifier.fit(X_train, y_train.A.ravel()) logits[l] = logit_classifier y_est = np.mat(logit_classifier.predict(X)).T votes = votes + y_est ErrorRate = (y != y_est).sum(dtype=float) / N print('Error rate: {0}%'.format(ErrorRate * 100)) # Estimated value of class labels (using 0.5 as threshold) by majority voting y_est_ensemble = votes > (L / 2) # Compute error rate ErrorRate = (y != y_est_ensemble).sum(dtype=float) / N print('Error rate: {:.1f}%'.format(ErrorRate * 100)) ce = BinClassifierEnsemble(logits) figure(1) dbprobplot(ce, X, y, 'auto', resolution=200) figure(2) dbplot(ce, X, y, 'auto', resolution=200) show()
X_test = np.matrix(mat_data['X_test']) y = np.matrix(mat_data['y']) y_train = np.matrix(mat_data['y_train']) y_test = np.matrix(mat_data['y_test']) attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()] classNames = [name[0][0] for name in mat_data['classNames']] N, M = X.shape C = len(classNames) # Fit and plot one-vs-rest classifiers y_test_est = np.mat(np.zeros((y_test.shape[0],C))) for c in range(C): logit_classifier = LogisticRegression() logit_classifier.fit(X_train,ravel((y_train==c).astype(int))) y_test_est[:,c] = np.mat(logit_classifier.predict(X_test)).T figure(c+1) dbplot(logit_classifier,X_test,(y_test==c).astype(int),'auto') # Plot results for multinomial fit (softmax) figure(C+1) logit_classifier = LogisticRegression() logit_classifier.fit(X_train,ravel(y_train)) dbplot(logit_classifier,X_test,y_test,'auto') # Compute error rate y_test_ensemble = np.mat(logit_classifier.predict(X_test)).T ErrorRate = (y_test!=y_test_ensemble).sum(dtype=float)/y_test.shape[0] show() print('Error rate (ensemble): {0}%'.format(100*ErrorRate))
X_train = np.matrix(mat_data['X_train']) X_test = np.matrix(mat_data['X_test']) y = np.matrix(mat_data['y']) y_train = np.matrix(mat_data['y_train']) y_test = np.matrix(mat_data['y_test']) attributeNames = [name[0] for name in mat_data['attributeNames'].squeeze()] classNames = [name[0][0] for name in mat_data['classNames']] N, M = X.shape C = len(classNames) # Fit and plot one-vs-rest classifiers y_test_est = np.mat(np.zeros((y_test.shape[0], C))) for c in range(C): logit_classifier = LogisticRegression() logit_classifier.fit(X_train, ravel((y_train == c).astype(int))) y_test_est[:, c] = np.mat(logit_classifier.predict(X_test)).T figure(c + 1) dbplot(logit_classifier, X_test, (y_test == c).astype(int), 'auto') # Plot results for multinomial fit (softmax) figure(C + 1) logit_classifier = LogisticRegression() logit_classifier.fit(X_train, ravel(y_train)) dbplot(logit_classifier, X_test, y_test, 'auto') # Compute error rate y_test_ensemble = np.mat(logit_classifier.predict(X_test)).T ErrorRate = (y_test != y_test_ensemble).sum(dtype=float) / y_test.shape[0] show() print('Error rate (ensemble): {0}%'.format(100 * ErrorRate))
logits = [0]*L votes = np.zeros((N,1)) # For each round of bagging for l in range(L): # Extract training set by random sampling with replacement from X and y X_train, y_train = bootstrap(X, y, N, weights) # Fit logistic regression model to training data and save result logit_classifier = LogisticRegression() logit_classifier.fit(X_train, y_train.A.ravel()) logits[l] = logit_classifier y_est = np.mat(logit_classifier.predict(X)).T votes = votes + y_est ErrorRate = (y!=y_est).sum(dtype=float)/N print('Error rate: {0}%'.format(ErrorRate*100)) # Estimated value of class labels (using 0.5 as threshold) by majority voting y_est_ensemble = votes>(L/2) # Compute error rate ErrorRate = (y!=y_est_ensemble).sum(dtype=float)/N print('Error rate: {:.1f}%'.format(ErrorRate*100)) ce = BinClassifierEnsemble(logits) figure(1); dbprobplot(ce, X, y, 'auto', resolution=200) figure(2); dbplot(ce, X, y, 'auto', resolution=200) show()