Пример #1
0
def runKPCA(ds):

    print("\n\nrunning Kernel PCA")

    #split the data
    (trainingX, testingX, trainingY, testingY) = dataTransform(ds)

    #begin timing
    startTime = time.time()

    #reduce the dimensions
    kpca = KPCA(n_components=2, kernel='rbf', gamma=15)
    trainingX = kpca.fit_transform(trainingX)
    testingX = kpca.transform(testingX)

    #run logistic regression
    lr = LogisticRegression(C=100.0, random_state=1)
    lr.fit(trainingX, trainingY)
    prediction = lr.predict(testingX)

    endTime = time.time()

    #show error
    print("Error score is: ", mean_squared_error(prediction, testingY))

    print("Runtime in seconds: ", endTime - startTime)
Пример #2
0
                                                    test_size=0.25,
                                                    random_state=0)

#feature scaling -> scale all values to same range(currently salary and age are not in same range for example)
# not applied to Y_train and Y_test : depdendent variables

from sklearn.preprocessing import StandardScaler
sc_X = StandardScaler()
X_train = sc_X.fit_transform(X_train)
X_test = sc_X.fit_transform(X_test)
"""
Apply Kernal PCA
"""
from sklearn.decomposition import KernelPCA as KPCA
kpca = KPCA(
    n_components=2, kernel='rbf'
)  # intially it was None, then changed it to 2 , by checking the variance sum of first 2 values ( greater than 50)
X_train = kpca.fit_transform(X_train)
X_test = kpca.transform(X_test)

# Fitting Logistic regression to training set
from sklearn.linear_model import LogisticRegression
classifier = LogisticRegression(random_state=0)
classifier.fit(X_train, Y_train)

# predicting the test set results
y_pred = classifier.predict(X_test)

# Making the confusion matrix
from sklearn.metrics import confusion_matrix
cm = confusion_matrix(
Пример #3
0
classifier(X_train, X_test, y_train, y_test)

#PCA decomponent
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)
classifier(X_train_pca, X_test_pca, y_train, y_test)

#LDA decomponent
lda = LDA(n_components=2)
X_train_lda = lda.fit_transform(X_train, y_train)
X_test_lda = lda.transform(X_test)
classifier(X_train_lda, X_test_lda, y_train, y_test)

#kpca decomponent
kpca = KPCA(n_components=2, kernel='rbf')
X_train_kpca = kpca.fit_transform(X_train)
X_test_kpca = kpca.transform(X_test)
classifier(X_train_kpca, X_test_kpca, y_train, y_test)

for i in [0.2, 0.4, 0.6, 0.8, 1]:
    kpca = KPCA(n_components=2, kernel='rbf', gamma=i)
    X_train_kpca = kpca.fit_transform(X_train)
    X_test_kpca = kpca.transform(X_test)
    print(i)
    classifier(X_train_kpca, X_test_kpca, y_train, y_test)

print("My name is Guanhua Sun")
print("My NetID is: guanhua4")
print(
    "I hereby certify that I have read the University policy on Academic Integrity and that I am not in violation."
Пример #4
0
import numpy as np
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from mlxtend.plotting import plot_decision_regions
import matplotlib.pyplot as plt
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.decomposition import PCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.decomposition import KernelPCA as KPCA

pca = PCA(n_components=2)
lda = LDA(n_components=2)
kpca = KPCA(n_components=2, kernel='rbf')

C = 100
sc = StandardScaler()
#
df = pd.read_csv("./diamonds.csv")
df = df.fillna(0)
cut_mapping = {'Very Good': 0, 'Good': 1, 'Premium': 2, 'Ideal': 3, 'Fair': 4}
df['cut'] = df['cut'].map(cut_mapping)

color_mapping = {'D': 0, 'G': 1, 'F': 2, 'H': 3, 'J': 4, 'I': 5, 'E': 6}
df['color'] = df['color'].map(color_mapping)

clarity_mapping = {
    'VVS2': 0,
    'VVS1': 1,
Пример #5
0
if args.dimensionality_reduction_method == "PCA":
    pca = PCA(n_components=k, whiten=True)
    x_red[0] = pca.fit_transform(x[2])
    x_red[1] = pca.transform(x[3])
    x_red[2] = pca.transform(x[4])
    print("Dimenionality Reduction methd used: ", pca)

if args.dimensionality_reduction_method == "LDA":
    lda = LDA(n_components=k)
    x_red[0] = lda.fit_transform(x[2], y[2])
    x_red[1] = lda.transform(x[3])
    x_red[2] = lda.transform(x[4])
    print("Dimenionality Reduction methd used: ", lda)

if args.dimensionality_reduction_method == "KPCA":
    kpca = KPCA(n_components=k, kernel=args.kernel_pca)
    x_red[0] = kpca.fit_transform(x[2])
    x_red[1] = kpca.transform(x[3])
    x_red[2] = kpca.transform(x[4])
    print("Dimenionality Reduction methd used: ", kpca)

# training the model
if args.C == None:
    C = [0.5, 5, 10, 20]
else:
    C = [args.C]

if args.gamma == None:
    gam = [0.01, 0.05, 0.1, 0.5, 1]
else:
    gam = [args.gamma]