Exemplo n.º 1
0
X = X[:,attribute_included].reshape(-1,1)
attributeNames = attributeNames[attribute_included]
N, M = X.shape
C = len(classNames)

# K-fold crossvalidation
K = 2
CV = StratifiedKFold(K, shuffle=True)

k=0
for train_index, test_index in CV.split(X,y):
    print(train_index)
    # extract training and test set for current CV fold
    X_train, y_train = X[train_index,:], y[train_index]
    X_test, y_test = X[test_index,:], y[test_index]

    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, y_train)

    y_test_est = logit_classifier.predict(X_test).T
    p = logit_classifier.predict_proba(X_test)[:,1].T

    figure(k)
    rocplot(p,y_test)

    figure(k+1)
    confmatplot(y_test,y_test_est)

    k+=2
    
show()    
Exemplo n.º 2
0
X = X[:,attribute_included]
attributeNames = attributeNames[attribute_included]
N, M = X.shape
C = len(classNames)

# K-fold crossvalidation
K = 2
CV = cross_validation.StratifiedKFold(y.A.ravel().tolist(),K)

k=0
for train_index, test_index in CV:

    # extract training and test set for current CV fold
    X_train, y_train = X[train_index,:], y[train_index,:]
    X_test, y_test = X[test_index,:], y[test_index,:]

    logit_classifier = LogisticRegression()
    logit_classifier.fit(X_train, y_train.A.ravel())

    y_test_est = np.mat(logit_classifier.predict(X_test)).T
    p = np.mat(logit_classifier.predict_proba(X_test)[:,1]).T

    figure(k)
    rocplot(p,y_test)

    figure(k+1)
    confmatplot(y_test,y_test_est)

    k+=2
    
show()    
Exemplo n.º 3
0
    best_index = np.argmin(err_inner_loop)
    logit_classifier = lm.LogisticRegression(C=clf_best_para[best_index])
    logit_classifier.fit(X_train, y_train)
    y_est = logit_classifier.predict(X_test)
    y_test_outer.append(y_test)
    x_test_outer.append(X_test)
    clf_list.append(logit_classifier)
    best_est.append(y_est)
    mis_classified = np.sum(y_est != y_test)
    relative_error = mis_classified / len(y_test)
    gen_err[k] = relative_error
    k += 1
# show confusion matrix for model training using K-fold cross validation
best_index = gen_err.argmin()
figure()
confmatplot(best_est[best_index], y_test_outer[best_index])
print("The mean generalization error is {0}".format(np.mean(gen_err)))

#
#
#
# #show confusion matrix for model testing using Leave one out cross validation
# figure(1)
# Y_est2 = clf_list[best_index].predict(X_test)
# confmatplot(Y_test,Y_est2)
# show()
#
# Decision boundaries for the multinomial regression model

def nevallog(xval):
    return np.argmax(clf_list[best_index].predict_proba(xval),1)
Exemplo n.º 4
0
    best_knn = KNeighborsClassifier(n_neighbors=best_param_knn[k])
    best_knn = best_knn.fit(X_train, y_train)
    y_est = best_knn.predict(X_test)
    X_test_outer.append(X_test)
    best_knn_list.append(best_knn)
    mis_classified = np.sum(y_est != y_test)
    relative_error = mis_classified / len(y_test)
    y_ESTKNN.append(y_est)

    y_test_outer.append(y_test)
    err_test_outer.append(relative_error)
    k += 1

print("The generalization error is {0} ".format(np.mean(err_test_outer)))
best_index = np.argmin(err_test_outer)

confmatplot(y_test_outer[best_index], y_ESTKNN[best_index])
show()

figure(1)


def neval(xval):
    return np.argmax(best_knn.predict_proba(xval), 1)


if k_pca == 2:
    figure()
    dbplotf(X_test_outer[best_index], y_test_outer[best_index], neval, 'auto')
    show()
Exemplo n.º 5
0
best_index_total = errors_outer.argmin()

gen_error = np.mean(errors_outer).round(3)
print("The generalization error is {0}% misclassification".format(gen_error*100))

figure(figsize=(6,7));
plot; bar(range(0,K),errors_outer); title('Square Errors for best performing models from the inner loop');
xticks(np.arange(K), (best_hidden_neurons[0],
                      best_hidden_neurons[1],
                      best_hidden_neurons[2]))
xlabel('Hidden Neurons')
ylabel('Square Error')

# The best confusion matrix for the ann selection
figure();
confmatplot(y_test_outer[best_index_total], y_est_outer[best_index_total])

# neval = np.argmax(best_clf_outer[best_index_total].predict_proba(X_test_outer[best_index_total]),1)
# Doing the decision boundaries
def neval(xval):
    return np.argmax(best_clf_outer[best_index_total].predict_proba(xval), 1)

if k_pca == 2:
    figure();
    dbplotf(X_test_outer[best_index_total], y_test_outer[best_index_total], neval, 'auto')