示例#1
0
文件: HW4_wei.py 项目: wjiang16/CMM
tune_parameter()
st = StandardScaler().fit(df_train)
df_train = st.transform(df_train)
df_test = st.transform(df_test)

pca = PCA(n_components= 2)
lr = LogisticRegression(C=0.06)
# lr =SVC(C=1, cache_size=200, class_weight='balanced', coef0=0.0, degree=3,
# gamma=0.001, kernel='rbf', max_iter=-1, probability=True,
# random_state=None, shrinking=True, tol=0.001, verbose=False)
X_train_pca = pca.fit_transform(df_train)
X_test_pca = pca.transform(df_test)

lr.fit(X_train_pca, y_train)

plot_decision_regions(X_train_pca, y_train, classifier=lr)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend(loc ='lower left')
plt.savefig('PCA_prediction.jpg')
plt.show()

fig = plt.figure(figsize=(7,5))
# mean_tpr = 0
# mean_tpr = np.linspace(0,1,100)
all_tpr = []
### plot ROC curve for test data
probas = lr.fit(df_train, y_train).predict_proba(df_test)
fpr, tpr, thresholds = roc_curve(y_test, probas[:,1], pos_label=1)

roc_auc = auc(fpr, tpr)
from sklearn.tree import DecisionTreeClassifier
from sklearn import datasets
from sklearn.cross_validation import train_test_split
from plot_decision_region import plot_decision_regions
import numpy as np
import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score



iris = datasets.load_iris()
X = iris.data[:, [2, 3]]
y = iris.target

X_train , X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=0)

X_combined = np.vstack((X_train, X_test))
y_combined = np.hstack((y_train, y_test))

tree = DecisionTreeClassifier(criterion='entropy',max_depth=3, random_state=0)
tree.fit(X_train, y_train)

plot_decision_regions(X_combined, y_combined,classifier=tree, test_idx=range(105,150))
plt.xlabel('petal length [cm]')
plt.ylabel('petal width [cm]')
plt.legend(loc='upper left')
plt.show()

y_pred = tree.predict(X_test)
print("accuurecy :%.3f"%  accuracy_score(y_test, y_pred))
示例#3
0
tune_parameter()
st = StandardScaler().fit(df_train)
df_train = st.transform(df_train)
df_test = st.transform(df_test)

pca = PCA(n_components=2)
lr = LogisticRegression(C=0.06)
# lr =SVC(C=1, cache_size=200, class_weight='balanced', coef0=0.0, degree=3,
# gamma=0.001, kernel='rbf', max_iter=-1, probability=True,
# random_state=None, shrinking=True, tol=0.001, verbose=False)
X_train_pca = pca.fit_transform(df_train)
X_test_pca = pca.transform(df_test)

lr.fit(X_train_pca, y_train)

plot_decision_regions(X_train_pca, y_train, classifier=lr)
plt.xlabel('PC1')
plt.ylabel('PC2')
plt.legend(loc='lower left')
plt.savefig('PCA_prediction.jpg')
plt.show()

fig = plt.figure(figsize=(7, 5))
# mean_tpr = 0
# mean_tpr = np.linspace(0,1,100)
all_tpr = []
### plot ROC curve for test data
probas = lr.fit(df_train, y_train).predict_proba(df_test)
fpr, tpr, thresholds = roc_curve(y_test, probas[:, 1], pos_label=1)

roc_auc = auc(fpr, tpr)
示例#4
0
        if tmp_max[1] > temp_max[1]:
            temp_index = tmp_index
            temp_max = tmp_max
            temp_max_factor = [ival, jval]

        ## Plot graph for prediction for the specific result concluded from previous experiment based on this Python code
        ##if ([ival,jval] == [2,11] and testcase == "2") or ([ival,jval] == [11,12] and testcase != "2"):
        if ([ival, jval] == [7, 9]
                and testcase == "2") or ([ival, jval] == [7, 9]
                                         and testcase != "2"):
            for h in range(len(Accuracy)):
                print("Accuracy of [7,9] for method", h, "is :", Accuracy[h])
            X_combined_std = np.vstack((X_train_std, X_test_std))
            y_combined = np.hstack((y_train, y_test))
            plot_decision_regions(X=X_combined_std,
                                  y=y_combined,
                                  classifier=ppn[tmp_index],
                                  test_idx=range(105, 150))
            ##plot_decision_regions(X=X_combined_std, y=y_combined,classifier=ppn[1], test_idx=range(105,150))## Plot linear regression decision graph
            plt.title('Plot of Prediction of the Best Accuracy')
            if testcase == "2":
                plt.xlabel('Maximum Heartrate Achieved')
                plt.ylabel('ST Depression Induced by Exercise Related to Rest')
                ##plt.xlabel('Chest Pain Type')
                ##plt.ylabel('Number of Major Vessels Colored by Flourosopy')
            else:
                plt.xlabel('Maximum Heartrate Achieved')
                plt.ylabel('ST Depression Induced by Exercise Related to Rest')
                ##plt.xlabel('Number of Major Vessels Colored by Flourosopy')
                ##plt.ylabel('Thal')

            plt.legend(loc='upper left')
示例#5
0
    header=None)
df.tail()
y = df.iloc[0:100, 4].values
y = np.where(y == 'Iris-setosa', -1, 1)
X = df.iloc[0:100, [0, 2]].values
print(X, y)
plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='Setosa')

plt.scatter(X[50:100, 0],
            X[50:100, 1],
            color='blue',
            marker='x',
            label='Versicolor')
plt.xlabel('Petal length')
plt.ylabel('sepal length')
plt.legend(loc='upper left')
plt.show()

ppn = Perceptron(eta=0.001, n_iter=10)
ppn.fit(X, y)
print(ppn.errors_)
plt.plot(range(1, len(ppn.errors_) + 1), ppn.errors_, marker='o')
plt.xlabel('epochs')
plt.ylabel('Number of misclassifications')
plt.show()

plot_decision_regions(X, y, classifier=ppn)
plt.xlabel('sepal length [cm]')
plt.ylabel('petal length [cm]')
plt.legend(loc='upper left')
plt.show()
示例#6
0
plt.scatter(X[:50, 0], X[:50, 1], color='red', marker='o', label='Setosa')

plt.scatter(X[50:100, 0],
            X[50:100, 1],
            color='blue',
            marker='x',
            label='Versicolor')
plt.xlabel('Petal length')
plt.ylabel('sepal length')
plt.legend(loc='upper left')
plt.show()

### Standardization
X_std = np.copy(X)
X_std[:, 0] = (X[:, 0] - X[:, 0].mean()) / X[:, 0].std()
X_std[:, 1] = (X[:, 1] - X[:, 1].mean()) / X[:, 1].std()

ada = adalineSGD(n_iter=15, eta=0.01, random_state=1)
ada.fit(X_std, y)
plot_decision_regions(X_std, y, classifier=ada)
plt.title('Adaline - Stochastic Gradient descent')
plt.xlabel('Sepal length [standardized]')
plt.ylabel('Petal length [standardized]')
plt.legend(loc='upper left')
plt.show()

plt.plot(range(1, len(ada.cost_) + 1), ada.cost_, marker='o')
plt.xlabel('Epochs')
plt.ylabel('Average cost')
plt.show()