Exemplo n.º 1
0
def classifyNB():
    # clf = classify(features_train, labels_train, features_test, labels_test)

    clf = NB_classify(features_train, labels_train, features_test, labels_test)

    ### draw the decision boundary with the text points overlaid
    prettyPicture(clf, features_test, labels_test)
    output_image("test.png", "png", open("test.png", "rb").read())
Exemplo n.º 2
0
def submitClassify():

    clf = classify(features_train, labels_train)

    ### draw the decision boundary with the text points overlaid
    try:
        prettyPicture(clf, features_test, labels_test)
        output_image("test.png", "png", open("test.png", "rb").read())
    except NameError:
        pass
Exemplo n.º 3
0
def intro():
    clf = linear_model.LinearRegression()
    X_train = np.array([[1], [1.5], [4], [6]])
    y_train = np.array([1.5, 2, 3, 5])

    X_test = [[0], [2], [3]]
    y_test = [0, 2, 2.5]
    clf.fit(X_train, y_train)

    getInfo(clf, X_train, y_train, X_test, y_test)

    plt.scatter(X_train, y_train)
    plot_graph(clf, X_train, y_train, X_test, y_test, "X", "y")
    output_image("test.png", "png", open("test.png", "rb").read())
Exemplo n.º 4
0
def main():
    features_train, labels_train, features_test, labels_test = makeTerrainData(
    )

    ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
    ### in together--separate them so we can give them different colors in the scatterplot,
    ### and visually identify them
    grade_fast = [
        features_train[ii][0] for ii in range(0, len(features_train))
        if labels_train[ii] == 0
    ]
    bumpy_fast = [
        features_train[ii][1] for ii in range(0, len(features_train))
        if labels_train[ii] == 0
    ]
    grade_slow = [
        features_train[ii][0] for ii in range(0, len(features_train))
        if labels_train[ii] == 1
    ]
    bumpy_slow = [
        features_train[ii][1] for ii in range(0, len(features_train))
        if labels_train[ii] == 1
    ]

    # You will need to complete this function imported from the ClassifyNB script.
    # Be sure to change to that code tab to complete this quiz.
    gamma, c = 'auto', 1.0

    kernel = raw_input('Select the kernel: ')
    if (kernel != 'linear'):
        gamma = raw_input('Gamma: ')
        c = raw_input('C: ')

    clf = classify(features_train, labels_train, kernel, c, gamma)
    print('Python SVM Example')

    accuracy = clf.score(features_test, labels_test)
    print('Accuracy score: {}'.format(accuracy))

    ### draw the decision boundary with the text points overlaid
    prettyPicture(clf, features_test, labels_test)
    output_image("naive_bayes.png", "png",
                 open("naive_bayes.png", "rb").read())
    os.system('display naive_bayes.png &')
Exemplo n.º 5
0
from ClassifyDT import classify, DTAccuracy


import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)
accuracy = DTAccuracy(features_train, labels_train, features_test, labels_test)
print(accuracy)


### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("D:/machine_learning/git/decision_tree/test.png")


Exemplo n.º 6
0
"""
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify

import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both fast and slow points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)



### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image('test.png', 'png', open('test.png', 'rb').read())
Exemplo n.º 7
0
from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify

import numpy as np
import pylab as pl

features_train, labels_train, features_test, labels_test = makeTerrainData()

grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

clf = classify(features_train, labels_train)

prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png"))
Exemplo n.º 8
0
#!/usr/bin/python

from terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image
from ClassifyNB import classify

import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1]


clf = classify(features_train, labels_train)


### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("GaussianDecisionBoundary", "png", open("GaussianDecisionBoundary.png", "rb").read())
Exemplo n.º 9
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)

### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png")
Exemplo n.º 10
0
#################################################################################

### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

n_neighbors = 15
print "Loading %iNN library" % n_neighbors
from sklearn.neighbors import KNeighborsClassifier

clf = KNeighborsClassifier(n_neighbors)
print "Training algorithm"
clf.fit(features_train, labels_train)
print "Predicting results"
pred = clf.predict(features_test)

print "Computing algorithm accuracy"
from sklearn.metrics import accuracy_score

acc = accuracy_score(pred, labels_test)
print "Accuracy: %.4f" % acc
# Accuracy 93.6% for 3NN
# Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better?
# Accuracy 92.0% for 5NN
# Accuracy 93.6% for 7NN
outputfile = "test_%iNN.png" % n_neighbors
print "Saving output plot as %s" % outputfile
prettyPicture(clf, features_test, labels_test, outputfile)
#output_image(outputfile, "png", open("test.png"", "rb").read())
output_image(outputfile, "png", open(outputfile, "rb").read())
Exemplo n.º 11
0
for sample in min_samples:

    clf = tree.DecisionTreeClassifier(min_samples_split=sample)
    clf = clf.fit(features_train, labels_train)

    # clf = classify(features_train, labels_train)

    pred = clf.predict(features_test)

    accuracy = accuracy_score(pred, labels_test)

    acc_samples[f'acc_min_samples_split_{sample}'] = accuracy

    print(f'Accuracy for min_samples_split = {sample}: {accuracy}')

    prettyPicture(clf, features_test, labels_test, pic_name=f'test_{sample}')
    output_image(f"test_{sample}.png", "png",
                 open(f"test_{sample}.png", "rb").read())

    print('\n')


def submit_accuracies():
    return acc_samples


if __name__ == "__main__":

    pp(submit_accuracies())
Exemplo n.º 12
0
def classifyDT(features_train, labels_train, features_test, labels_test):
    clf = DT_classify(features_train, labels_train, features_test, labels_test)
    prettyPicture(clf, features_test, labels_test)
    output_image("tree.png", "png", open("test.png", "rb").read())
Exemplo n.º 13
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)

### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
Exemplo n.º 14
0
                color="r",
                label="slow",
                marker=marker,
                alpha=alpha)


plot_xy(features_train, labels_train, marker='x', alpha=0.5)
plot_xy(features_test, labels_test, marker='o', alpha=1)

plt.legend()
plt.xlabel("bumpiness")
plt.ylabel("grade")
plt.title("training data")
plt.show()
plt.savefig("initial.png")
output_image('initial.png')

################################################################################

print(f'samples train = {len(features_train)}, test = {len(features_test)}')
print(
    f'fast % train ={sum(labels_train)*100.0/len(labels_train)}, test ={sum(labels_test)*100.0/len(labels_test)}'
)

# your code here!  name your classifier object clf if you want the
# visualization code (prettyPicture) to show you the decision boundary

classifiers = [
    (
        DecisionTreeClassifier(min_samples_leaf=8, random_state=0),
        {
Exemplo n.º 15
0
### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]



clf = classify(features_train, labels_train)


    ### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())


# Accuracy
# method 1
pred = clf.predict(features_test)
accuracy = sum(labels_test == pred) / float(len(labels_test))
print("Accuracy is: ", accuracy)

# method 2
from sklearn.metrics import accuracy_score
print("Accuracy is: ", accuracy_score(pred, labels_test))

# method 3
print("Accuracy is: ", clf.score(features_test, labels_test))
Exemplo n.º 16
0
### the training data (features_train, labels_train) have both fast and slow points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf, accuracy = NBAccuracy(features_train, labels_train, features_test,
                           labels_test)

### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image(test.png, png, open(test.png, rb).read())
print accuracy
Exemplo n.º 17
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

# the classify() function in classifyDT is where the magic happens
clf = classify(features_train, labels_train)

# predict
pred = clf.predict(features_test)

# compute accuracy
acc = clf.score(features_test, labels_test)

print "accuracy for min_sample_split=2:", acc

# build and save the scatter plot to the file
prettyPicture(clf, features_test, labels_test, "test_min_sample_split2.png")
output_image("test_min_sample_split2.png", "png", open("test_min_sample_split2.png", "rb").read())

# get classifier with higher min_sample_split
clf = classify(features_train, labels_train, 50)

# predict
pred = clf.predict(features_test)

# compute accuracy
acc = clf.score(features_test, labels_test)

print "accuracy for min_sample_split=50:", acc

# build and save the scatter plot to the file
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())
Exemplo n.º 18
0
#plt.show()

#################################################################################


### your code here!  name your classifier object clf if you want the
### visualization code (prettyPicture) to show you the decision boundary

n_neighbors = 15
print "Loading %iNN library" % n_neighbors
from sklearn.neighbors import KNeighborsClassifier
clf = KNeighborsClassifier(n_neighbors)
print "Training algorithm"
clf.fit(features_train,labels_train)
print "Predicting results"
pred = clf.predict(features_test)

print "Computing algorithm accuracy"
from sklearn.metrics import accuracy_score
acc = accuracy_score(pred, labels_test)
print "Accuracy: %.4f" % acc
# Accuracy 93.6% for 3NN
# Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better?
# Accuracy 92.0% for 5NN
# Accuracy 93.6% for 7NN
outputfile = "test_%iNN.png" % n_neighbors
print "Saving output plot as %s" % outputfile
prettyPicture(clf, features_test, labels_test,outputfile)
#output_image(outputfile, "png", open("test.png"", "rb").read())
output_image(outputfile, "png", open(outputfile, "rb").read())
from sklearn.naive_bayes import GaussianNB
from sklearn import metrics

from prep_terrain_data import makeTerrainData
from class_vis import prettyPicture, output_image


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


NBclassifier = GaussianNB()
NBclassifier.fit(features_train, labels_train)

NBpreditiction = NBclassifier.predict(features_test)
print(metrics.accuracy_score(labels_test, NBpreditiction))
#print NBclassifier.score(features_test, labels_test)

### draw the decision boundary with the text points overlaid
prettyPicture(NBclassifier, features_test, labels_test, "naive_bayes/naive_bayes.png")
output_image("naive_bayes/naive_bayes.png", "png", open("naive_bayes/naive_bayes.png", "rb").read())

Exemplo n.º 20
0
        min_impurity_split=None,
        class_weight=None,
        presort='deprecated',
        ccp_alpha=0.0)
    clf.fit(features_train,
            labels_train,
            sample_weight=None,
            check_input=True,
            X_idx_sorted=None)
    return clf


clf = classify(features_train, labels_train)

# store your predictions in a list named pred
# pred = clf.predict(features_test)
# acc = accuracy_score(pred, labels_test)
acc = clf.score(features_test, labels_test)


def submitAccuracy():
    return acc


print(acc)

#### grader code, do not modify below this line

prettyPicture(clf, features_test, labels_test)
output_image('test.png')
Exemplo n.º 21
0
import sys
from svm_classifier import classify
sys.path.append("../tools/")
from prep_terrain_data import make_terrain_data
from class_vis import pretty_picture, output_image


features_train, labels_train, features_test, labels_test = make_terrain_data()

clf, accuracy = classify(features_train, labels_train, features_test, labels_test)

# draw the decision boundary with the text points overlaid
pretty_picture(clf, features_test, labels_test, "svm_speed.png")
output_image("svm_speed.png", "png", open("svm_speed.png", "rb").read())
Exemplo n.º 22
0
features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
bumpy_fast = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 0
]
grade_slow = [
    features_train[ii][0] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]
bumpy_slow = [
    features_train[ii][1] for ii in range(0, len(features_train))
    if labels_train[ii] == 1
]

# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)

### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("result.png", "png", open("result.png", "rb").read())
Exemplo n.º 23
-1
def classify(features_train, labels_train):   
    ### import the sklearn module for GaussianNB
    ### create classifier
    ### fit the classifier on the training features and labels
    ### return the fit classifier
    
        
    ### your code goes here!
import numpy as np
import pylab as pl


features_train, labels_train, features_test, labels_test = makeTerrainData()

### the training data (features_train, labels_train) have both "fast" and "slow" points mixed
### in together--separate them so we can give them different colors in the scatterplot,
### and visually identify them
grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0]
bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0]
grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1]
bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1]


# You will need to complete this function imported from the ClassifyNB script.
# Be sure to change to that code tab to complete this quiz.
clf = classify(features_train, labels_train)



### draw the decision boundary with the text points overlaid
prettyPicture(clf, features_test, labels_test)
output_image("test.png", "png", open("test.png", "rb").read())