def ranforest(n_estimators, min_samples_split):
    
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.metrics import accuracy_score
    
    clf = RandomForestClassifier(n_estimators = n_estimators, 
                                 min_samples_split = min_samples_split,
                                 bootstrap = True)
    clf.fit(features_train, labels_train)
    
    t_fit = time()
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t_fit, 3), "s"
    
    t_pred = time()
    pred = clf.predict(features_test)
    print "predict time:", round(time()-t_pred, 3), "s"
    
    print accuracy_score(pred, labels_test)

    
    try:
        prettyPicture(clf, features_test, labels_test)
    except NameError:
        pass
def supportvector(C, gamma = 'default'):
    
    from sklearn.svm import SVC
    from sklearn.metrics import accuracy_score
    
    if gamma == 'default':
        clf = SVC(kernel="rbf", C = C)
    else:
        clf = SVC(kernel="rbf", C = C, gamma = gamma)
    
    clf.fit(features_train, labels_train)
    
    t_fit = time()
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t_fit, 3), "s"
    
    t_pred = time()
    pred = clf.predict(features_test)
    print "predict time:", round(time()-t_pred, 3), "s"
    
    print accuracy_score(pred, labels_test)
    
    try:
        prettyPicture(clf, features_test, labels_test)
    except NameError:
        pass
Пример #3
0
def process(method,name,param_grid,**argument):
    t0=time()
    clf=grid_search.GridSearchCV(method(**argument),param_grid)
    clf.fit(features_train,labels_train)
    pred=clf.predict(features_test)
    print(clf.best_estimator_)
    print("accuracy",metrics.accuracy_score(labels_test,pred))
    print("done in %0.3fs" % (time()-t0))
    prettyPicture(clf, features_test, labels_test,name)
def runClassifier(clf, clfName):
    from time import time
    t0 = time()
    clf.fit(features_train, labels_train)
    print clfName, "training time:", round(time()-t0, 3), "s"
    ### measure the accuracy 
    accuracy = clf.score(features_test, labels_test)
    print "accuracy:",accuracy*100,"%"
    ### visualization code (prettyPicture) to show you the decision boundary

    try:
        prettyPicture(clf, features_test, labels_test,clfName)
    except NameError:
        print "no pic"
Пример #5
0
def k_nearest_neughbours():
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score
    from time import time
    cls = KNeighborsClassifier(n_neighbors=8)

    t0 = time()
    cls.fit(features_train, labels_train)
    print "training time:", round(time()-t0, 3), "s"

    t0 = time()
    pred = cls.predict(features_test)
    print "prediction time:", round(time()-t0, 3), "s"
    print "accuracy: ", accuracy_score(labels_test, pred)
    try:
        prettyPicture(cls, features_test, labels_test)
    except NameError:
        pass
Пример #6
0
def KNearestNeigh(k):
  from sklearn.neighbors import KNeighborsClassifier
  
  clf = KNeighborsClassifier(n_neighbors = k)

  start_time = time()
  clf.fit(features_train, labels_train)
  elapsed = time()-start_time
  text = "Training time (kNearestNeigh:{0}): {1}s".format(k, round(elapsed, 3))
  writeToFile("ChooseYourOwn_output.txt", text, "a")

  acc = clf.score(features_test, labels_test)
  text = "Accuracy (kNearestNeigh:{0}): {1}".format(k, round(acc, 3))
  writeToFile("ChooseYourOwn_output.txt", text, "a")

  try:
    prettyPicture(clf, features_test, labels_test)
  except NameError:
    pass
Пример #7
0
def RandomForest(k):
  from sklearn.ensemble import RandomForestClassifier

  clf = RandomForestClassifier(n_estimators = k)

  start_time = time()
  clf.fit(features_train, labels_train)
  elapsed = time()-start_time
  text = "Training time (RandomForest:{0}): {1}s".format(k, round(elapsed, 3))
  writeToFile("ChooseYourOwn_output.txt", text, "a")

  acc = clf.score(features_test, labels_test)
  text = "Accuracy (RandomForest:{0}): {1}".format(k, round(acc, 3))
  writeToFile("ChooseYourOwn_output.txt", text, "a")

  try:
    prettyPicture(clf, features_test, labels_test)
  except NameError:
    pass
def adaboost(n_estimators, learning_rate):
    
    from sklearn.ensemble import AdaBoostClassifier
    from sklearn.metrics import accuracy_score
    
    clf = AdaBoostClassifier(n_estimators = n_estimators, 
                             learning_rate = learning_rate)
    clf.fit(features_train, labels_train)
    
    t_fit = time()
    clf.fit(features_train, labels_train)
    print "training time:", round(time()-t_fit, 3), "s"
    
    t_pred = time()
    pred = clf.predict(features_test)
    print "predict time:", round(time()-t_pred, 3), "s"
    
    print accuracy_score(pred, labels_test)

    try:
        prettyPicture(clf, features_test, labels_test)
    except NameError:
        pass
Пример #9
0
def testClassifier(clf):

    print "number of features in train=",  len(features_train[0])

    t0 = time()
    clf.fit(features_train, labels_train)
    tt = time()

    prettyPicture(clf, features_test, labels_test)
    #output_image("test.png", "png", open("test.png", "rb").read())
    display(Image("test.png"))

    print "training time:", round(tt-t0, 3), "s"

    t1 = time()
    pred = clf.predict(features_test)
    print "predict time:", round(time()-t1, 3), "s"

    #print "answer10=", pred[10]
    #print "answer26=", pred[26]
    #print "answer50=", pred[50]

    import numpy as np
    #print "# of 1's(sum)=", np.sum(pred)
    print "# of 1's(count_nonzero)=", np.count_nonzero(pred)

    import collections
    print "# of 1's(Counter)=", collections.Counter(pred)
    # of 1's(Counter)= Counter({0: 881, 1: 877})

    print "len(pred)=", len(pred), " len(labes_test)=", len(labels_test)

    from sklearn.metrics import accuracy_score
    print "accuracy_score:", (accuracy_score(labels_test, pred))

    return
Пример #10
0
def drawDecisionBoundary(clf):
    print 'Classifier: ', clf.__class__.__name__
    t0 = time()
    clf.fit(features_train, labels_train)
    print 'time training: ', time()-t0

    t1 = time()
    pred = clf.predict(features_test)
    print 'time predicting: ', time()-t1

    accuracy = accuracy_score(labels_test, pred)
    print 'accuracy: ', accuracy

    try:
        plot = prettyPicture(clf, features_test, labels_test)
        plot.show()
    except NameError:
        print 'something wront'
        pass
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


def classify(clf, features_train, labels_train, **kwargs):
    clf = clf(**kwargs)
    clf.fit(features_train, labels_train)
    return clf

def classifyAdaboost(features_train, labels_train, n_estimators=100):
    return classify(AdaBoostClassifier, features_train, labels_train, n_estimators=n_estimators)

def classifyKNN(features_train, labels_train, n_neighbors=8):
    return classify(KNeighborsClassifier, features_train, labels_train, n_neighbors=n_neighbors)

def classifyRF(features_train, labels_train, n_estimators=100):
    return classify(RandomForestClassifier, features_train, labels_train, n_estimators=n_estimators)

if __name__ == "__main__":
    clf_dict = {"knn": classifyKNN,
                "adaboost": classifyAdaboost,
                "randomforest": classifyRF}
    for name, clf in clf_dict.iteritems():
        print(name, ":")
        clf_fitted = clf(features_train, labels_train)
        pred = clf_fitted.predict(features_test)
        print("Accuracy:", accuracy_score(labels_test, pred))
        prettyPicture(clf_fitted, features_test, labels_test)
Пример #12
0
x_train, y_train, x_test, y_test = makeTerrainData()

df = pandas.DataFrame(data=x_test, columns=['f_1', 'f_2'])
df['label'] = pandas.Series(y_test)

dat_pos = df[df.label == 1]
dat_neg = df[df.label == 0]

plt.scatter(dat_pos['f_1'], dat_pos['f_2'], c='blue')
plt.scatter(dat_neg['f_1'], dat_neg['f_2'], c='red')
#plt.show()

clf = svm.SVC(kernel='rbf', C=0.01)
clf.fit(x_train, y_train)

prettyPicture(clf, x_test, y_test)

#w = clf.coef_[0]
#a = -w[0]/w[1]
#m = clf.intercept_[0] / w[1]
#x_ = np.linspace(0.0,1.0)
#y_ = a*x_ - m

#plt.plot(x_,y_)
plt.show()

#feature_1 = [x[0] for x in x_train]
#feature_2 = [x[1] for x in x_train]

#pos = [i for i,x in enumerate(y_train) if x == 1]
#neg = [i for i,x in enumerate(y_train) if x != 1]
Пример #13
0
title = "Learning Curves (Naive Bayes)"
# Cross validation with 100 iterations to get smoother mean test and train
# score curves, each time with 20% data randomly selected as a validation set.
cv = ShuffleSplit(n_splits=100, test_size=0.2, random_state=0)

estimator = GaussianNB()
plot_learning_curve(estimator,
                    title,
                    nfeatures,
                    nlabels,
                    ylim=(0.7, 1.01),
                    cv=cv,
                    n_jobs=4)

try:
    prettyPicture(clf, features_test, labels_test, feature_1, feature_2,
                  "naive.png")
except NameError:
    pass

# decision tree
from sklearn import tree
clf = tree.DecisionTreeClassifier()
clf = clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
print "decision tree: ", clf.score(features_test, labels_test)
print "decision tree: precision score: ", metrics.precision_score(
    labels_test, pred)
print "decision tree: recall score: ", metrics.recall_score(labels_test, pred)

title = "Learning Curves (Decision tree)"
# Cross validation with 100 iterations to get smoother mean test and train
Пример #14
0

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################


### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score as acc

clf = RandomForestClassifier(n_jobs=-1, criterion="gini", n_estimators=100, min_samples_leaf=5, max_features=1)
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
print "Accuracy", acc(pred, labels_test)


try:
    prettyPicture(clf, features_train, labels_train)
except NameError:
    pass
Пример #15
0
plt.xlabel("bumpiness")
plt.ylabel("grade")
#plt.show()
#################################################################################


### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary


from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

clf = KNeighborsClassifier(n_neighbors=100)
print "Start training"
clf.fit(features_train, labels_train)
print "End training"
print "Beging prediction"
pred = clf.predict(features_test)
print "End prediction"
acc = accuracy_score(labels_test, pred)
print acc

#print "{0} {1} {2}".format(len(clf.predict(features_test)),len(features_test), len(labels_test))

try:
    my_return = prettyPicture(clf, features_test, labels_test)
    print my_return
except NameError:
    print "Oops!"
Пример #16
0
# k-nearest neighbor
from sklearn.neighbors import KNeighborsClassifier
neigh = KNeighborsClassifier(n_neighbors=1)
neigh.fit(features_train, labels_train)
print "KNN Accuracy:", neigh.score(features_test, labels_test)

# Random Forest
from sklearn.ensemble import RandomForestClassifier
rfc = RandomForestClassifier(n_estimators=200)
rfc.fit(features_train, labels_train)
print "Random Forest Accuracy:", rfc.score(features_test, labels_test)

# AdaBoost
from sklearn.ensemble import AdaBoostClassifier
abc = AdaBoostClassifier()
abc.fit(features_train, labels_train)
print "AdaBoost Accuracy:", abc.score(features_test, labels_test)

prettyPicture(neigh, features_test, labels_test, "neigh.png")
prettyPicture(rfc, features_test, labels_test, "rfc.png")
prettyPicture(abc, features_test, labels_test, "abc.png")

# for clf in [neigh, rfc, abc]:
#     try:
#         print "plotting"
#         prettyPicture(clf, features_test, labels_test)
#     except NameError:
#         print "passed"
#         pass
Пример #17
0
def show_plot(clf, features_test, labels_test):
    try:
        prettyPicture(clf, features_test, labels_test)
    except NameError:
        pass
Пример #18
0
### visualization code (prettyPicture) to show you the decision boundary

for algo in ["adaboost", "random_forest", "KNN"]:
    clf = 0
    if algo == "adaboost":
        from sklearn.ensemble import AdaBoostClassifier
        clf = AdaBoostClassifier().fit(features_train, labels_train)
    if algo == "random_forest":
        from sklearn.ensemble import RandomForestClassifier
        clf = RandomForestClassifier().fit(features_train, labels_train)
    if algo == "KNN":
        from sklearn.neighbors import KNeighborsClassifier
        clf = KNeighborsClassifier(n_neighbors=8).fit(features_train,
                                                      labels_train)
    try:
        prettyPicture(clf, features_test, labels_test, name=algo)
    except NameError:
        pass
    from sklearn.metrics import accuracy_score
    print("%s accuracy: %f" %
          (algo, accuracy_score(labels_test, clf.predict(features_test))))

# for n_estimators in [50,100,150,200,250,300,350,400,460,500]:
#     clf = 0
#     from sklearn.ensemble import AdaBoostClassifier
#     clf = AdaBoostClassifier(n_estimators=n_estimators).fit(features_train, labels_train)
#     print("%d adaBoost accuracy: %f"%(n_estimators,accuracy_score(labels_test,clf.predict(features_test))))

maxAccuracy = 0
for k in range(2, 200):
    clf = 0
                'n_estimators': param_estimators,
                'accuracy': acc_ada
            }


#import pandas as pd
#df = pd.DataFrame(many_ada_versions())
#df_pivot = df.pivot(index='n_estimators', columns='learning_rate', values='accuracy')
#df_pivot.plot()
#print(df_pivot.max())

#the best score: ADA : n=20, rate=2
ada = getAdaBoost(features_train,
                  labels_train,
                  n_estimators=20,
                  learning_rate=2)
y_pred_ada = ada.predict(features_test)
acc_ada = accuracy_score(labels_test, y_pred_ada)
acc = {
    #        "acc_kmeans": round(acc_kmeans,3),
    #        "acc_forest": round(acc_forest,3),
    "acc_ada": round(acc_ada, 3),
}
print(acc)

try:
    prettyPicture(ada, features_test, labels_test)
    plt.show()
except NameError:
    pass
Пример #20
0
#################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

n_neighbors = 15
print "Loading %iNN library" % n_neighbors
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors)
print "Training algorithm"
clf.fit(features_train, labels_train)
print "Predicting results"
pred = clf.predict(features_test)

print "Computing algorithm accuracy"
from sklearn.metrics import accuracy_score

acc = accuracy_score(pred, labels_test)
print "Accuracy: %.4f" % acc
# Accuracy 93.6% for 3NN
# Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better?
# Accuracy 92.0% for 5NN
# Accuracy 93.6% for 7NN
outputfile = "test_%iNN.png" % n_neighbors
print "Saving output plot as %s" % outputfile
prettyPicture(clf, features_test, labels_test, outputfile)
#output_image(outputfile, "png", open("test.png"", "rb").read())
output_image(outputfile, "png", open(outputfile, "rb").read())
Пример #21
0
                                 learning_rate=1.0,
                                 algorithm='SAMME.R',
                                 random_state=None)
clfAdaBoost.fit(features_train, labels_train)
predAdaBoost = clfAdaBoost.predict(features_test)

from sklearn.svm import SVC

clfSVM = SVC(C=1000.0, kernel='rbf')
clfSVM.fit(features_train, labels_train)
predSVM_rbf = clfSVM.predict(features_test)

clfSVM = SVC(C=1000.0, kernel='poly', degree=1)
clfSVM.fit(features_train, labels_train)
predSVM_polyFirst = clfSVM.predict(features_test)

try:
    prettyPicture(clfKNN, features_test, labels_test)
    prettyPicture(clfRandomForest, features_test, labels_test)
    prettyPicture(clfAdaBoost, features_test, labels_test)

except NameError:
    pass

from sklearn.metrics import accuracy_score
print "KNN-Accuracy: ", accuracy_score(predKnn, labels_test)
print "Random Forest-Accuracy: ", accuracy_score(predRandomForest, labels_test)
print "AdaBoost-Accuracy: ", accuracy_score(predAdaBoost, labels_test)
print "SVM-RBF Kernel: ", accuracy_score(predSVM_rbf, labels_test)
print "SVM-Poly Kernel: ", accuracy_score(predSVM_polyFirst, labels_test)
Пример #22
0
for sample in min_samples:

    clf = tree.DecisionTreeClassifier(min_samples_split=sample)
    clf = clf.fit(features_train, labels_train)

    # clf = classify(features_train, labels_train)

    pred = clf.predict(features_test)

    accuracy = accuracy_score(pred, labels_test)

    acc_samples[f'acc_min_samples_split_{sample}'] = accuracy

    print(f'Accuracy for min_samples_split = {sample}: {accuracy}')

    prettyPicture(clf, features_test, labels_test, pic_name=f'test_{sample}')
    output_image(f"test_{sample}.png", "png",
                 open(f"test_{sample}.png", "rb").read())

    print('\n')


def submit_accuracies():
    return acc_samples


if __name__ == "__main__":

    pp(submit_accuracies())
Пример #23
0
### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

clf = AdaBoostClassifier(DecisionTreeClassifier(max_depth=10), n_estimators=10)
clf = clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
print "AdaBoost+tree_accuracy"
print accuracy_score(labels_test, pred)

clf = AdaBoostClassifier(base_estimator=SVC(random_state=1),
                         algorithm="SAMME",
                         n_estimators=1)
clf = clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
print "svc_accuracy"
print accuracy_score(labels_test, pred)

clf_randomforest = RandomForestClassifier(n_estimators=100)
clf_randomforest.fit(features_train, labels_train)
prettyPicture(clf_randomforest, features_test, labels_test)
#score.append(["randomforest", clf_randomforest.score(features_test, labels_test)])
pred = clf_randomforest.predict(features_test)
print "randomforest_accuracy"
print accuracy_score(labels_test, pred)

try:
    prettyPicture(clf, features_test, labels_test)
except NameError:
    pass
Пример #24
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

########################## SVM #################################
# we handle the import statement and SVC creation for you here
from sklearn.svm import SVC
clf = SVC(kernel="linear")

# now your job is to fit the classifier
# using the training features/labels, and to
# make a set of predictions on the test data

clf.fit(features_train, labels_train)

# store your predictions in a list named pred

pred = clf.predict(features_test)

prettyPicture(clf, features_test, labels_test, f_name="svm_lin.png")
Image.open('svm_lin.png').show()

acc = accuracy_score(pred, labels_test)
print "SVM accuracy: %r" % acc

clf = SVC(kernel="rbf")
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
prettyPicture(clf, features_test, labels_test, f_name="svm_rbf.png")

def submitAccuracy():
    return acc
Пример #25
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

# the classify() function in classifyDT is where the magic happens
clf = classify(features_train, labels_train)

# predict
pred = clf.predict(features_test)

# compute accuracy
acc = clf.score(features_test, labels_test)

print "accuracy for min_sample_split=2:", acc

# build and save the scatter plot to the file
prettyPicture(clf, features_test, labels_test, "test_min_sample_split2.png")
output_image("test_min_sample_split2.png", "png", open("test_min_sample_split2.png", "rb").read())

# get classifier with higher min_sample_split
clf = classify(features_train, labels_train, 50)

# predict
pred = clf.predict(features_test)

# compute accuracy
acc = clf.score(features_test, labels_test)

print "accuracy for min_sample_split=50:", acc

# build and save the scatter plot to the file
prettyPicture(clf, features_test, labels_test)
print "features_train: {}".format(len(features_train))
print "features_test: {}".format(len(features_test))
print "labels_train: {}".format(len(labels_train))
print "labels_test: {}".format(len(labels_test))

from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier, GradientBoostingClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

try:
    clf = AdaBoostClassifier(n_estimators=100)
    clf = clf.fit(features_train, labels_train)
    accuracy = accuracy_score(labels_test, clf.predict(features_test))
    print "AdaBoostClassifier accuracy: {}".format(accuracy)
    prettyPicture(clf, features_test, labels_test, "adaboost_test.png")
except NameError:
    pass

try:
    clf = RandomForestClassifier()
    clf = clf.fit(features_train, labels_train)
    accuracy = accuracy_score(labels_test, clf.predict(features_test))
    print "RandomForestClassifier accuracy: {}".format(accuracy)
    prettyPicture(clf, features_test, labels_test, "randomforest_test.png")
except NameError:
    pass

try:
    clf = GradientBoostingClassifier()
    clf = clf.fit(features_train, labels_train)
Пример #27
0
#!/usr/bin/python
""" lecture and example code for decision tree unit """

import sys
sys.path.insert(1, '../naive_bayes')
import class_vis

from prep_terrain_data import makeTerrainData

# import matplotlib.pyplot as plt
# import numpy as np
# import pylab as pl
from classifyDT import classify

features_train, labels_train, features_test, labels_test = makeTerrainData()

### the classify() function in classifyDT is where the magic
### happens--fill in this function in the file 'classifyDT.py'!
clf = classify(features_train, labels_train, features_test, labels_test)

#### grader code, do not modify below this line

class_vis.prettyPicture(clf, features_test, labels_test, "test.png")
#output_image("test.png", "png", open("test.png", "rb").read())

# importing Image class from PIL package
from PIL import Image
# creating a object
im = Image.open("test.png")
im.show()
#!/usr/bin/python
""" lecture and example code for decision tree unit """

import sys
sys.path.append('../Lesson2_Naive_Bayes')
from class_vis import prettyPicture, output_image
from prep_terrain_data import makeTerrainData

import matplotlib.pyplot as plt
import numpy as np
import pylab as pl
from classifyDT import classify

features_train, labels_train, features_test, labels_test = makeTerrainData()

### the classify() function in classifyDT is where the magic
### happens--fill in this function in the file 'classifyDT.py'!
from sklearn import tree
from sklearn.metrics import accuracy_score
clf = tree.DecisionTreeClassifier(min_samples_split=40, criterion='gini')
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(pred, labels_test)
'''
#### grader code, do not modify below this line
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
'''
Пример #29
0
from class_vis import prettyPicture, show_img
from clear import clear

features_train, labels_train, features_test, labels_test = makeTerrainData()

#########################################################
### your code goes here ###

from sklearn.svm import SVC
from sklearn import metrics

clear()

# C makes more training points correct
# Gamma makes decision boundaries much closer or much far

classifier = SVC(kernel="linear", C=1)
classifier.fit(features_train, labels_train)

prediction = classifier.predict(features_test)

accuracy = metrics.accuracy_score(labels_test, prediction)

print(accuracy)

pictureName = "svm.png"

prettyPicture(classifier, features_test, labels_test, pictureName)

show_img(pictureName)
#########################################################
Пример #30
0
import sys
sys.path.append("../choose_your_own/")
from class_vis import prettyPicture
import numpy as np
x = np.array([[-1, -1], [-2, -1], [1, 1], [2, 1]])
print x
y = np.array([1, 1, 2, 2])
print y
from sklearn.svm import SVC
clf = SVC()
plt = clf.fit(x, y)
print plt
test_data = clf.predict([[-0.8, -1]])
print test_data
prettyPicture(clf, x, y)
Пример #31
0
t0 = time()
adaBoostClf = AdaBoostClassifier(n_estimators=30,learning_rate=0.4)
adaBoostClf.fit(features_train, labels_train)
print "default adaBoost training time:", round(time()-t0, 3), "s"

#t0 = time()
#rfClf = RandomForestClassifier()
#rfClf.fit(features_train, labels_train)
#print "default randomForest training time:", round(time()-t0, 3), "s"

#knnPred = knnClf.predict(features_test)
#knnacc = accuracy_score(knnPred, labels_test)

adaBoostPred = adaBoostClf.predict(features_test)
adaBoostacc = accuracy_score(adaBoostPred, labels_test)

#rfPred = rfClf.predict(features_test)
#rfacc = accuracy_score(rfPred, labels_test)

# print "default knn accuracy:", knnacc
print "default adaBoost accuracy:", adaBoostacc
# print "default rf accuracy:", rfacc

8

try:
    prettyPicture(adaBoostClf, features_test, labels_test)
except NameError:
	print "unable to produce boundary"
    pass
import sys
sys.path.append("../JumpToMachineLearning/Helpers/")

from prep_data import makeTerrainData
from class_vis import prettyPicture, Show_Image
from ClassifyHelper import Accuracy

picture_name = "SVMclf.png"

features_train, labels_train, features_test, labels_test = makeTerrainData()

########################## SVM #################################

from sklearn.svm import SVC
clf = SVC(C=1000.0, kernel="rbf", gamma=10)
#  large C means more trainig points uncluded,
#  kernel  see in sklearn documentation
#  gamma defines how far the influence of a single trainig example reaches, high gamma means very curvy decision boundary

clf.fit(features_train, labels_train)
pred = clf.predict(features_test)

prettyPicture(clf, features_test, labels_test, picture_name)

Show_Image(picture_name)

accuracy = Accuracy(clf, features_test, labels_test)

print("Accuracy score of svm for terrain data is : {}".format(accuracy))
Пример #33
0
#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
# plt.show()
################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

#########################################################
### your code goes here ###

# from sklearn.ensemble import RandomForestClassifier
# clf = RandomForestClassifier()
# clf.fit(features_train, labels_train)
# print " acc: ", clf.score(features_test, labels_test)

from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors=4, algorithm='auto', weights='distance')
clf.fit(features_train, labels_train)
print "acc: ", clf.score(features_test, labels_test)

try:
    prettyPicture(clf, features_test, labels_test)
except NameError:
    pass
Пример #34
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()

clf = AdaBoostClassifier()
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(pred, labels_test)
print "Decision Tree accuracy: %r" % acc

try:
    prettyPicture(clf, features_test, labels_test, f_name="ada_boost.png")
except NameError:
    pass
Пример #35
0
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################


### your code here!  name your classifier object clf if you want the 
### visualization code (prettyPicture) to show you the decision boundary

## K Nearest Neighbors 
knnclf = KNeighborsClassifier()
knnclf.fit(features_train, labels_train)
pred = knnclf.predict(features_test)
knnacc = accuracy_score(labels_test, pred)
print(knnacc)
prettyPicture(knnclf, features_test, pred)
# accuracy = 0.92

rfclf = RandomForestClassifier()
rfclf.fit(features_train, labels_train)
pred = rfclf.predict(features_test)
rfacc = accuracy_score(labels_test, pred)
print(rfacc)
prettyPicture(rfclf, features_test, pred)
#accuracy = 0.92

abclf = AdaBoostClassifier()
abclf.fit(features_train, labels_train)
pred = abclf.predict(features_test)
abacc = accuracy_score(labels_test, pred)
print(abacc)
Пример #36
0
    clf = AdaBoostClassifier(n_estimators=n)
    clf = clf.fit(features_train, labels_train)
    pred = clf.predict(features_test)
    return accuracy_score(pred, labels_test)

def optimal_adaboost():
    print("optimal AdaBoost")
    optimal_n = 0
    optimal_accuracy = 0
    for i in range(1, 30):
        acc = classify_adaboost(i)
        if acc > optimal_accuracy:
            optimal_accuracy = acc
            optimal_n = i
    print("estimators: " + str(optimal_n))
    print("accuracy:   " + str(optimal_accuracy))
    print("")

    clf = AdaBoostClassifier(n_estimators=optimal_n)
    clf.fit(features_train, labels_train)
    return clf

clf = optimal_KNN()
prettyPicture(clf, features_test, labels_test, 'optimal_knn.png')

clf = optimal_forest()
prettyPicture(clf, features_test, labels_test, 'optimal_forest.png')

clf = optimal_adaboost()
prettyPicture(clf, features_test, labels_test, 'optimal_adaboost.png')
Пример #37
0
features_train, labels_train, features_test, labels_test = makeTerrainData()


clf = DecisionTreeClassifier(min_samples_split=50)

# now your job is to fit the classifier
# using the training features/labels, and to
# make a set of predictions on the test data

clf.fit(features_train, labels_train)

# store your predictions in a list named pred

pred = clf.predict(features_test)

prettyPicture(clf, features_test, labels_test, f_name="dec_tree.png")
Image.open('dec_tree.png').show()

acc = accuracy_score(pred, labels_test)
print "Decision Tree accuracy: %r" % acc

"""
	clf = DecisionTreeClassifier(min_samples_split=2)
	clf.fit(features_train, labels_train)
	pred = clf.predict(features_test)
	acc_min_samples_split_2 = accuracy_score(pred, labels_test)

	clf = DecisionTreeClassifier(min_samples_split=50)
	clf.fit(features_train, labels_train)
	pred = clf.predict(features_test)
	acc_min_samples_split_50 = accuracy_score(pred, labels_test)
Пример #38
0
### Predicting
print 'start predicting...'
t2 = time()
labels_pred = clf.predict(features_test)
str2 = "predicting time: " + str(round(time() - t2, 3)) + "s"
print str2 + '\n'

### Accuracy
from sklearn.metrics import accuracy_score

accuracy = accuracy_score(labels_test, labels_pred)
print 'accuracy: ' + str(accuracy)

### Write to Log

file = open('log.txt', 'a')
file.write('******************************************************\n\n')
file.write('DateTime: ' + date_time.strftime("%m/%d/%y %H:%M:%S") + '\n\n')
file.write('Method: ' + str(clf) + '\n\n')
file.write(str1 + '\n')
file.write(str2 + '\n')
file.write('Accuracy: ' + str(accuracy) + '\n\n\n')

### Predicted boundary

try:
    prettyPicture(clf, features_test, labels_test,
                  date_time.strftime("%m%d%y_%H%M%S"))
except NameError:
    pass
import pylab as pl
from classifyDT import classify

features_train, labels_train, features_test, labels_test = makeTerrainData()

from sklearn import tree
from sklearn.metrics import accuracy_score

clf_split_2 = tree.DecisionTreeClassifier(min_samples_split = 2)
clf_split_50 = tree.DecisionTreeClassifier(min_samples_split = 50)

clf_split_2.fit(features_train, labels_train)
clf_split_50.fit(features_train, labels_train)

pred_split_2 = clf_split_2.predict(features_test)
pred_split_50 = clf_split_50.predict(features_test)

acc_min_samples_split_2 = accuracy_score(pred_split_2, labels_test)
acc_min_samples_split_50 = accuracy_score(pred_split_50, labels_test)
### be sure to compute the accuracy on the test set

def submitAccuracies():
  return {"acc":round(acc,3)}

#### grader code, do not modify below this line

prettyPicture(clf_split_2, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
prettyPicture(clf_split_50, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
Пример #40
0
clf = DecisionTreeClassifier(min_samples_split=40)
t0 = time()
### features_train = features_train[:int(len(features_train)/100)]
### labels_train = labels_train[:int(len(labels_train)/100)]
clf.fit(features_train, labels_train)
print("Time to train:", round(time() - t0, 3), "s")

t0 = time()
pred = clf.predict(features_test)
print("Time to make prediction:", round(time() - t0, 3), "s")

### calculate and return the accuracy on the test data
from sklearn.metrics import accuracy_score
accuracy = accuracy_score(labels_test, pred)
print("Accuracy of Decision Tree predictor is: {}".format(accuracy))
##################################################################

### draw the decision boundary with the text points overlaid
### we only take the first two features.
new_features_train = features_train[:, :2]
new_features_test = features_test[:, :2]
new_clf = DecisionTreeClassifier(min_samples_split=40)
new_clf.fit(new_features_train, labels_train)
import matplotlib.pyplot as plt
plt = prettyPicture(new_clf, new_features_test, labels_test)
plt.show()
###output_image("test.png", "png", open("test.png", "rb").read())

#########################################################
#########################################################
Пример #41
0
y_fit = clf.fit(features_train, labels_train)

pred = y_fit.predict(features_test)

#### store your predictions in a list named pred


### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]

### draw the decision boundary with the text points overlaid
image_name = "SVM_"+kernel_type+"_cval_"+ str(cval)
prettyPicture(clf, features_test, labels_test, image_name)
#output_image(image_name, "png", open(image_name+".png", "rb").read())



from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, labels_test)

print acc


def submitAccuracy():
    return acc
Пример #42
0
for algorithmParam in algorithmParamArray:
    # Train Data
    print "*******Algortihm used:", algorithmParam
    print "----------Training Phase (KNeighbors)---------"
    t0 = time()
    clfKNN=KNeighborsClassifier(n_neighbors=10,algorithm=algorithmParam)
    clfKNN.fit(features_train, labels_train)
    print "training time (KNeighbors):", round(time()-t0, 3), "s"
    
    # Test Data
    print "----------Testing Phase (KNeighbors)---------"
    accuracy=clfKNN.score(features_test,labels_test)
    print "Accuracy (KNeighbors):", accuracy, '\n'
    try:
        prettyPicture(clfKNN, features_test, labels_test)
    except NameError:
        pass


##2.- Using Random Forest
# Parameters
#algorithmParamArray=['auto','ball_tree','kd_tree','brute']
algorithmParam='Random Forest' 
# Train Data
print "*******Algortihm used:", algorithmParam
print "----------Training Phase (Random Forest)----"
t0 = time()
clfRF=RandomForestClassifier(n_estimators=10)
clfRF.fit(features_train, labels_train)
print "training time (Random Forest):", round(time()-t0, 3), "s"
Пример #43
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color = "b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color = "r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()

clf = KNeighborsClassifier()
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(pred, labels_test)
print "Decision Tree accuracy: %r" % acc

try:
    prettyPicture(clf, features_test, labels_test, f_name="knn.png")
except NameError:
    pass
Пример #44
0
print("tempo de predição:", round(time() - t0, 3), "s")

acc = accuracy_score(labels_test, pred_adaboost)
print(acc)

print("RandomForest =======================")
from sklearn.ensemble import RandomForestClassifier
rnd_clf = RandomForestClassifier(n_estimators=100,
                                 max_leaf_nodes=4,
                                 n_jobs=2,
                                 random_state=0)

t0 = time()
rnd_clf = rnd_clf.fit(features_train, labels_train)
print("tempo de treinamento:", round(time() - t0, 3), "s")

t0 = time()
pred_rnd = rnd_clf.predict(features_test)
print("tempo de predição:", round(time() - t0, 3), "s")

#print(rnd_clf.predict_proba(features_test))[0:10]

acc = accuracy_score(labels_test, pred_rnd)
print(acc)

try:
    #prettyPicture(clf, features_test, labels_test)
    prettyPicture(clf_adaboost, features_test, labels_test)
except NameError:
    pass
Пример #45
0
### visualization code (prettyPicture) to show you the decision boundary

from sklearn.ensemble import AdaBoostClassifier
from sklearn.ensemble import AdaBoostRegressor
from sklearn.metrics import accuracy_score
from sklearn.metrics import r2_score


# abc = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=1.0,
#                         algorithm='SAMME.R', random_state=None)
#
# abc.fit(features_train, labels_train)
# predicted = abc.predict(features_test)
# accuracy = accuracy_score(labels_test, predicted)
# print accuracy


abr = AdaBoostRegressor(base_estimator=None, n_estimators=500, learning_rate=1.0,
                  loss='linear', random_state=None)
abr.fit(features_train, labels_train)
predicted_test = abr.predict(features_test)
test_score = r2_score(labels_test, predicted_test)


print test_score


try:
    prettyPicture(abr, features_test, labels_test)
except NameError:
    pass
Пример #46
0
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()

#########################################################
### your code goes here ###
from sklearn import tree
clf = tree.DecisionTreeClassifier(min_samples_split=40)

t0 = time()
clf.fit(features_train, labels_train)
print "training time:", round(
    time() - t0,
    3), "s"  #round() function rounding up the time to 3 decimal places

t0 = time()
pred = clf.predict(features_test)
print "predecting time:", round(time() - t0, 3), "s"

from sklearn.metrics import accuracy_score
acc = accuracy_score(labels_test, pred)
print(acc)
print(len(features_train[0]))

#visiualization of decision tree
from class_vis import output_image, prettyPicture
prettyPicture(clf, features_train, labels_train)
output_image("test.png", "png", open("test.png", "rb")).read()

#########################################################
Пример #47
0
def display_picture(clf):
    try:
       prettyPicture(clf, features_test, labels_test)
    except NameError:
       pass
    if labels_train[ii] == 1
]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

##### K nearestNEIGHBOR ALGORITHM

function_name = "KNeighborsRegressor"
#clf = KNeighborsRegressor(n_neighbors=2,leaf_size=1)
clf = KNeighborsRegressor()
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
pred_mat = [round(pred[i]) for i in pred]
pred_rate = accuracy_score(pred_mat, labels_test)
print pred_rate

### draw the decision boundary with the text points overlaid
prettyPicture(function_name, clf, features_test, labels_test)
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


NBclassifier = GaussianNB()
NBclassifier.fit(features_train, labels_train)

NBpreditiction = NBclassifier.predict(features_test)
print(metrics.accuracy_score(labels_test, NBpreditiction))
#print NBclassifier.score(features_test, labels_test)

### draw the decision boundary with the text points overlaid
prettyPicture(NBclassifier, features_test, labels_test, "naive_bayes/naive_bayes.png")
output_image("naive_bayes/naive_bayes.png", "png", open("naive_bayes/naive_bayes.png", "rb").read())

def print_picture(prefix, clf):
    try:
        filename = prefix + ".png"
        prettyPicture(clf, features_test, labels_test, filename)
    except NameError:
        pass
Пример #51
0
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify

import numpy as np
import pylab as pl

features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
classifier = classify(features_train, labels_train)
print classifier.score(features_test, labels_test)

### draw the decision boundary with the text points overlaid
prettyPicture(classifier, features_test, labels_test)
# output_image("test.png", "png", open("test.png", "rb").read())




Пример #52
0
    if labels_train[ii] == 1
]

#### initial visualization
plt.xlim(0.0, 1.0)
plt.ylim(0.0, 1.0)
plt.scatter(bumpy_fast, grade_fast, color="b", label="fast")
plt.scatter(grade_slow, bumpy_slow, color="r", label="slow")
plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary
from sklearn.neighbors import KDTree
kdt = KDTree(features_train, leaf_size=30, metric='euclidean')
t0 = time()
kdt.query(features_train, k=2, return_distance=False)
print("training time:", round(time() - t0, 3), "s")

t0 = time()
print(kdt.score(features_test))

try:
    #    prettyPicture(clf, features_test, labels_test)
    prettyPicture(kdt, features_test, labels_test)
except NameError:
    pass
Пример #53
0
clf = KNeighborsClassifier(n_neighbors=1)

t0 = time()
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"

t0 = time()
prediction = clf.predict(features_test)
print "prediction time:", round(time()-t0, 3), "s"

from sklearn.metrics import accuracy_score
accuracy = accuracy_score(prediction, labels_test)
print "Accuracy:", accuracy

try:
    prettyPicture(clf, features_test, labels_test, "KNN.png")
except NameError:
    pass

### Adaboost
print "Adaboost:"
from sklearn.ensemble import AdaBoostClassifier
clf = AdaBoostClassifier()

t0 = time()
clf.fit(features_train, labels_train)
print "training time:", round(time()-t0, 3), "s"

t0 = time()
prediction = clf.predict(features_test)
print "prediction time:", round(time()-t0, 3), "s"
Пример #54
0
# sklearn.metrics.accuracy_score(pred,labels_test), accuracy method used by instructor in video
# accuracy of Naive Bayes Terrain Classifier method 3 of 3
myGaussianNBTerrainClassifierAAccuracy = sklearn.metrics.accuracy_score(
    pred, labels_test)
print("\tmyGaussianNBTerrainClassifierAAccuracy - {}".format(
    myGaussianNBTerrainClassifierAAccuracy))
# print("\ttype(myGaussianNB_Classifier_Accuracy) - {}\n".format(type(myGaussianNB_Classifier_Accuracy)))

# sklearn.metrics.accuracy_score(pred,labels_test), accuracy method used by instructor in video
# accuracy of Support Vector Machines - SVM - Terrain Classifier method 3 of 3
SupportVectorMachinesSVMTerraiClassifieAccuracy = sklearn.metrics.accuracy_score(
    SVMpred, labels_test)
print("\tSupportVectorMachinesSVMTerraiClassifieAccuracy - {}\n".format(
    SupportVectorMachinesSVMTerraiClassifieAccuracy))
# print("\ttype(SupportVectorMachinesSVMTerraiClassifieAccuracy) - {}\n".format(type(SupportVectorMachinesSVMTerraiClassifieAccuracy)))

### draw Naive Bayes Gaussian Classifier
### draw the decision boundary with the text points overlaid
myPrettyPicture = prettyPicture(clf, features_test, labels_test)

### draw SVM SupportVectorMachines Classifier
### draw the decision boundary with the text points overlaid
myPrettyPicture = prettyPicture(SVMclf, features_test, labels_test)

# print("\ttype(myPrettyPicture) - {}\n".format(type(myPrettyPicture)))

# output_image("test.png", "png", open('/Users/Menfi/Documents/workspace/zzzzz/src/test.png', "rb").read())

print('End studentMain.py')
Пример #55
0
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.show()
#################################################################################


### your code here!  name your classifier object clf if you want the 
### visualization code (prettyPicture) to show you the decision boundary


### importing random forest classfier and accuracy
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score


### declare, train and predict classifier
clf = RandomForestClassifier(n_estimators = 5, min_samples_split = 30, random_state = 90)
clf.fit(features_train,labels_train)
pred = clf.predict(features_test)


### print accuracy
acc = accuracy_score(labels_test,pred)
print acc


try:
    prettyPicture(clf, features_test, labels_test)
except NameError:
    pass
Пример #56
0
from class_vis import prettyPicture, output_image
from prep_terrain_data import makeTerrainData

import matplotlib.pyplot as plt
import numpy as np
import pylab as pl

features_train, labels_train, features_test, labels_test = makeTerrainData()

from sklearn.svm import SVC
clf = SVC(C=1.0, kernel="rbf")

clf.fit(features_train, labels_train)

pred = clf.predict(features_test)

from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, labels_test)

print "accuracy: ", acc

prettyPicture(clf, features_test, labels_test, "test1.png")
Пример #57
0
#plt.show()

#################################################################################


### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

n_neighbors = 15
print "Loading %iNN library" % n_neighbors
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors)
print "Training algorithm"
clf.fit(features_train,labels_train)
print "Predicting results"
pred = clf.predict(features_test)

print "Computing algorithm accuracy"
from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, labels_test)
print "Accuracy: %.4f" % acc
# Accuracy 93.6% for 3NN
# Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better?
# Accuracy 92.0% for 5NN
# Accuracy 93.6% for 7NN
outputfile = "test_%iNN.png" % n_neighbors
print "Saving output plot as %s" % outputfile
prettyPicture(clf, features_test, labels_test,outputfile)
#output_image(outputfile, "png", open("test.png"", "rb").read())
output_image(outputfile, "png", open(outputfile, "rb").read())
Пример #58
0

from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify

import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)



### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test, "test_100.png")
#output_image("test.png", "png", open("test.png", "rb").read())
Пример #59
-1
def classify(features_train, labels_train):   
    ### import the sklearn module for GaussianNB
    ### create classifier
    ### fit the classifier on the training features and labels
    ### return the fit classifier
    
        
    ### your code goes here!
import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)



### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
Пример #60
-1
### your code here!  name your classifier object clf if you want the 
### visualization code (prettyPicture) to show you the decision boundary
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import AdaBoostClassifier, RandomForestClassifier

clfKNN = KNeighborsClassifier(n_neighbors=9, weights='uniform', algorithm='auto', leaf_size=30, p=1, metric='minkowski', metric_params=None, n_jobs=1)
clfADA = AdaBoostClassifier(base_estimator=None, n_estimators=50, learning_rate=0.1, algorithm='SAMME.R', random_state=None)
clfRFC = RandomForestClassifier(n_estimators=10, criterion='gini', max_depth=None, min_samples_split=2, min_samples_leaf=1, min_weight_fraction_leaf=0.0, max_features='auto', max_leaf_nodes=None, bootstrap=True, oob_score=False, n_jobs=1, random_state=None, verbose=0, warm_start=False, class_weight=None)
which = raw_input("Enter the classifier to use: ")
#which = "ADA"
if which == "KNN":
    clfKNN.fit(features_train, labels_train)
    print "KNN Accuracy = ", clfKNN.score(features_test, labels_test)
    clf = clfKNN
    prettyPicture(clfKNN, features_test, labels_test)
elif which == "ADA":
    clfADA.fit(features_train, labels_train)
    clf = clfADA
    print "Adaboost Accuracy = ", clfADA.score(features_test, labels_test)
    prettyPicture(clfADA, features_test, labels_test)
else:
    clfRFC.fit(features_train, labels_train)
    clf = clfRFC
    print "RandomForestClassifier Accuracy = ", clfRFC.score(features_test, labels_test)
    prettyPicture(clfRFC, features_test, labels_test)


try:
    prettyPicture(clf, features_test, labels_test)
except NameError: