return scores def plotAccuracy(accuracy, pred, title): fig = plt.figure(figsize=(10, 4), tight_layout=True) ax = fig.add_subplot(1, 1, 1) plt.plot(k, accuracy) ax.set_xlabel("Predictors") ax.set_ylabel("Accuracy") ax.set_title(title, fontsize=12) plt.show() if __name__ == '__main__': X, Y = get_training() num_pred_list = [3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 30] alpha_list = [0, .125, .25, .375, .5, .625, .75, .875, 1] subsets = generate_subsets() ten_scores_subsets = getScores(X, Y, subsets, 10) loocv_scores_subsets = getScores(X, Y, subsets, len(X)) ten_scores_pca = getScores_pca(X, Y, num_pred_list, 10) loocv_scores_pca = getScores_pca(X, Y, num_pred_list, len(X)) ten_scores_lasso = getScores_lasso(X, Y, alpha_list, 10) loocv_scores_lasso = getScores_lasso(X, Y, alpha_list, len(X)) print("_______________________Subsets______________________________") for i in range(len(subsets)):
"""Helper function that gets full set of results Returns: Returns a dictionary of results as follows: { 'accuracy' : [list of accuracies], 'recall': [list of recall results], 'precision': [list of precision results] } """ return self.results if __name__ == '__main__': from data_utils import get_training from sklearn.discriminant_analysis import LinearDiscriminantAnalysis X, y = get_training() mdl = LinearDiscriminantAnalysis() val = Validation(X, y) pca_comp_to_test = [ 1, 3, 5, 10, 30] results = [] best_so_far = (0, -1) # common validation pattern for d in pca_comp_to_test: # update hyper params for preprocessing val.update(pca=d) # generally change model params acc = val.cross_val_accuracy(mdl) results.append(acc)
import matplotlib.pyplot as plt from data_utils import get_training, get_testing from sklearn.linear_model import LogisticRegression from sklearn.decomposition import PCA from sklearn import preprocessing from sklearn.model_selection import cross_val_score from my_pca import my_pca def best_logistic_regression(): pass if __name__ == '__main__': # Load data X_train, y_train = get_training() N, D = X_train.shape # Logistic Regression models = [('L2, C=1', LogisticRegression()), ('L2, C=10', LogisticRegression(C=10)), ('L2, C=100', LogisticRegression(C=100)), ('L2, C=1000', LogisticRegression(C=1000)), ('L1, C=1', LogisticRegression(penalty='l1')), ('L1, C=10', LogisticRegression(penalty='l1', C=10)), ('L1, C=100', LogisticRegression(penalty='l1', C=100)), ('L1, C=1000', LogisticRegression(penalty='l1', C=1000))] print 'Without PCA' for name, model in models: scores = cross_val_score(model,