def runKPCA(ds): print("\n\nrunning Kernel PCA") #split the data (trainingX, testingX, trainingY, testingY) = dataTransform(ds) #begin timing startTime = time.time() #reduce the dimensions kpca = KPCA(n_components=2, kernel='rbf', gamma=15) trainingX = kpca.fit_transform(trainingX) testingX = kpca.transform(testingX) #run logistic regression lr = LogisticRegression(C=100.0, random_state=1) lr.fit(trainingX, trainingY) prediction = lr.predict(testingX) endTime = time.time() #show error print("Error score is: ", mean_squared_error(prediction, testingY)) print("Runtime in seconds: ", endTime - startTime)
test_size=0.25, random_state=0) #feature scaling -> scale all values to same range(currently salary and age are not in same range for example) # not applied to Y_train and Y_test : depdendent variables from sklearn.preprocessing import StandardScaler sc_X = StandardScaler() X_train = sc_X.fit_transform(X_train) X_test = sc_X.fit_transform(X_test) """ Apply Kernal PCA """ from sklearn.decomposition import KernelPCA as KPCA kpca = KPCA( n_components=2, kernel='rbf' ) # intially it was None, then changed it to 2 , by checking the variance sum of first 2 values ( greater than 50) X_train = kpca.fit_transform(X_train) X_test = kpca.transform(X_test) # Fitting Logistic regression to training set from sklearn.linear_model import LogisticRegression classifier = LogisticRegression(random_state=0) classifier.fit(X_train, Y_train) # predicting the test set results y_pred = classifier.predict(X_test) # Making the confusion matrix from sklearn.metrics import confusion_matrix cm = confusion_matrix(
classifier(X_train, X_test, y_train, y_test) #PCA decomponent pca = PCA(n_components=2) X_train_pca = pca.fit_transform(X_train) X_test_pca = pca.transform(X_test) classifier(X_train_pca, X_test_pca, y_train, y_test) #LDA decomponent lda = LDA(n_components=2) X_train_lda = lda.fit_transform(X_train, y_train) X_test_lda = lda.transform(X_test) classifier(X_train_lda, X_test_lda, y_train, y_test) #kpca decomponent kpca = KPCA(n_components=2, kernel='rbf') X_train_kpca = kpca.fit_transform(X_train) X_test_kpca = kpca.transform(X_test) classifier(X_train_kpca, X_test_kpca, y_train, y_test) for i in [0.2, 0.4, 0.6, 0.8, 1]: kpca = KPCA(n_components=2, kernel='rbf', gamma=i) X_train_kpca = kpca.fit_transform(X_train) X_test_kpca = kpca.transform(X_test) print(i) classifier(X_train_kpca, X_test_kpca, y_train, y_test) print("My name is Guanhua Sun") print("My NetID is: guanhua4") print( "I hereby certify that I have read the University policy on Academic Integrity and that I am not in violation."
import numpy as np from sklearn.preprocessing import LabelEncoder from sklearn.preprocessing import StandardScaler from sklearn.model_selection import train_test_split from sklearn.svm import SVC from mlxtend.plotting import plot_decision_regions import matplotlib.pyplot as plt from sklearn.linear_model import LogisticRegression from sklearn.neighbors import KNeighborsClassifier from sklearn.decomposition import PCA from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA from sklearn.decomposition import KernelPCA as KPCA pca = PCA(n_components=2) lda = LDA(n_components=2) kpca = KPCA(n_components=2, kernel='rbf') C = 100 sc = StandardScaler() # df = pd.read_csv("./diamonds.csv") df = df.fillna(0) cut_mapping = {'Very Good': 0, 'Good': 1, 'Premium': 2, 'Ideal': 3, 'Fair': 4} df['cut'] = df['cut'].map(cut_mapping) color_mapping = {'D': 0, 'G': 1, 'F': 2, 'H': 3, 'J': 4, 'I': 5, 'E': 6} df['color'] = df['color'].map(color_mapping) clarity_mapping = { 'VVS2': 0, 'VVS1': 1,
if args.dimensionality_reduction_method == "PCA": pca = PCA(n_components=k, whiten=True) x_red[0] = pca.fit_transform(x[2]) x_red[1] = pca.transform(x[3]) x_red[2] = pca.transform(x[4]) print("Dimenionality Reduction methd used: ", pca) if args.dimensionality_reduction_method == "LDA": lda = LDA(n_components=k) x_red[0] = lda.fit_transform(x[2], y[2]) x_red[1] = lda.transform(x[3]) x_red[2] = lda.transform(x[4]) print("Dimenionality Reduction methd used: ", lda) if args.dimensionality_reduction_method == "KPCA": kpca = KPCA(n_components=k, kernel=args.kernel_pca) x_red[0] = kpca.fit_transform(x[2]) x_red[1] = kpca.transform(x[3]) x_red[2] = kpca.transform(x[4]) print("Dimenionality Reduction methd used: ", kpca) # training the model if args.C == None: C = [0.5, 5, 10, 20] else: C = [args.C] if args.gamma == None: gam = [0.01, 0.05, 0.1, 0.5, 1] else: gam = [args.gamma]