def classifyNB(): # clf = classify(features_train, labels_train, features_test, labels_test) clf = NB_classify(features_train, labels_train, features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
def submitClassify(): clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid try: prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read()) except NameError: pass
def intro(): clf = linear_model.LinearRegression() X_train = np.array([[1], [1.5], [4], [6]]) y_train = np.array([1.5, 2, 3, 5]) X_test = [[0], [2], [3]] y_test = [0, 2, 2.5] clf.fit(X_train, y_train) getInfo(clf, X_train, y_train, X_test, y_test) plt.scatter(X_train, y_train) plot_graph(clf, X_train, y_train, X_test, y_test, "X", "y") output_image("test.png", "png", open("test.png", "rb").read())
def main(): features_train, labels_train, features_test, labels_test = makeTerrainData( ) ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. gamma, c = 'auto', 1.0 kernel = raw_input('Select the kernel: ') if (kernel != 'linear'): gamma = raw_input('Gamma: ') c = raw_input('C: ') clf = classify(features_train, labels_train, kernel, c, gamma) print('Python SVM Example') accuracy = clf.score(features_test, labels_test) print('Accuracy score: {}'.format(accuracy)) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("naive_bayes.png", "png", open("naive_bayes.png", "rb").read()) os.system('display naive_bayes.png &')
from ClassifyDT import classify, DTAccuracy import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) accuracy = DTAccuracy(features_train, labels_train, features_test, labels_test) print(accuracy) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("D:/machine_learning/git/decision_tree/test.png")
""" from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both fast and slow points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image('test.png', 'png', open('test.png', 'rb').read())
from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] clf = classify(features_train, labels_train) prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png"))
#!/usr/bin/python from terrain_data import makeTerrainData from class_vis import prettyPicture, output_image from ClassifyNB import classify import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1] clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("GaussianDecisionBoundary", "png", open("GaussianDecisionBoundary.png", "rb").read())
features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png")
################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary n_neighbors = 15 print "Loading %iNN library" % n_neighbors from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors) print "Training algorithm" clf.fit(features_train, labels_train) print "Predicting results" pred = clf.predict(features_test) print "Computing algorithm accuracy" from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print "Accuracy: %.4f" % acc # Accuracy 93.6% for 3NN # Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better? # Accuracy 92.0% for 5NN # Accuracy 93.6% for 7NN outputfile = "test_%iNN.png" % n_neighbors print "Saving output plot as %s" % outputfile prettyPicture(clf, features_test, labels_test, outputfile) #output_image(outputfile, "png", open("test.png"", "rb").read()) output_image(outputfile, "png", open(outputfile, "rb").read())
for sample in min_samples: clf = tree.DecisionTreeClassifier(min_samples_split=sample) clf = clf.fit(features_train, labels_train) # clf = classify(features_train, labels_train) pred = clf.predict(features_test) accuracy = accuracy_score(pred, labels_test) acc_samples[f'acc_min_samples_split_{sample}'] = accuracy print(f'Accuracy for min_samples_split = {sample}: {accuracy}') prettyPicture(clf, features_test, labels_test, pic_name=f'test_{sample}') output_image(f"test_{sample}.png", "png", open(f"test_{sample}.png", "rb").read()) print('\n') def submit_accuracies(): return acc_samples if __name__ == "__main__": pp(submit_accuracies())
def classifyDT(features_train, labels_train, features_test, labels_test): clf = DT_classify(features_train, labels_train, features_test, labels_test) prettyPicture(clf, features_test, labels_test) output_image("tree.png", "png", open("test.png", "rb").read())
features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
color="r", label="slow", marker=marker, alpha=alpha) plot_xy(features_train, labels_train, marker='x', alpha=0.5) plot_xy(features_test, labels_test, marker='o', alpha=1) plt.legend() plt.xlabel("bumpiness") plt.ylabel("grade") plt.title("training data") plt.show() plt.savefig("initial.png") output_image('initial.png') ################################################################################ print(f'samples train = {len(features_train)}, test = {len(features_test)}') print( f'fast % train ={sum(labels_train)*100.0/len(labels_train)}, test ={sum(labels_test)*100.0/len(labels_test)}' ) # your code here! name your classifier object clf if you want the # visualization code (prettyPicture) to show you the decision boundary classifiers = [ ( DecisionTreeClassifier(min_samples_leaf=8, random_state=0), {
### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read()) # Accuracy # method 1 pred = clf.predict(features_test) accuracy = sum(labels_test == pred) / float(len(labels_test)) print("Accuracy is: ", accuracy) # method 2 from sklearn.metrics import accuracy_score print("Accuracy is: ", accuracy_score(pred, labels_test)) # method 3 print("Accuracy is: ", clf.score(features_test, labels_test))
### the training data (features_train, labels_train) have both fast and slow points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf, accuracy = NBAccuracy(features_train, labels_train, features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image(test.png, png, open(test.png, rb).read()) print accuracy
features_train, labels_train, features_test, labels_test = makeTerrainData() # the classify() function in classifyDT is where the magic happens clf = classify(features_train, labels_train) # predict pred = clf.predict(features_test) # compute accuracy acc = clf.score(features_test, labels_test) print "accuracy for min_sample_split=2:", acc # build and save the scatter plot to the file prettyPicture(clf, features_test, labels_test, "test_min_sample_split2.png") output_image("test_min_sample_split2.png", "png", open("test_min_sample_split2.png", "rb").read()) # get classifier with higher min_sample_split clf = classify(features_train, labels_train, 50) # predict pred = clf.predict(features_test) # compute accuracy acc = clf.score(features_test, labels_test) print "accuracy for min_sample_split=50:", acc # build and save the scatter plot to the file prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())
#plt.show() ################################################################################# ### your code here! name your classifier object clf if you want the ### visualization code (prettyPicture) to show you the decision boundary n_neighbors = 15 print "Loading %iNN library" % n_neighbors from sklearn.neighbors import KNeighborsClassifier clf = KNeighborsClassifier(n_neighbors) print "Training algorithm" clf.fit(features_train,labels_train) print "Predicting results" pred = clf.predict(features_test) print "Computing algorithm accuracy" from sklearn.metrics import accuracy_score acc = accuracy_score(pred, labels_test) print "Accuracy: %.4f" % acc # Accuracy 93.6% for 3NN # Accuracy 94.0% for 4NN but shouldn't use multiples of 2! Why is better? # Accuracy 92.0% for 5NN # Accuracy 93.6% for 7NN outputfile = "test_%iNN.png" % n_neighbors print "Saving output plot as %s" % outputfile prettyPicture(clf, features_test, labels_test,outputfile) #output_image(outputfile, "png", open("test.png"", "rb").read()) output_image(outputfile, "png", open(outputfile, "rb").read())
from sklearn.naive_bayes import GaussianNB from sklearn import metrics from prep_terrain_data import makeTerrainData from class_vis import prettyPicture, output_image features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] NBclassifier = GaussianNB() NBclassifier.fit(features_train, labels_train) NBpreditiction = NBclassifier.predict(features_test) print(metrics.accuracy_score(labels_test, NBpreditiction)) #print NBclassifier.score(features_test, labels_test) ### draw the decision boundary with the text points overlaid prettyPicture(NBclassifier, features_test, labels_test, "naive_bayes/naive_bayes.png") output_image("naive_bayes/naive_bayes.png", "png", open("naive_bayes/naive_bayes.png", "rb").read())
min_impurity_split=None, class_weight=None, presort='deprecated', ccp_alpha=0.0) clf.fit(features_train, labels_train, sample_weight=None, check_input=True, X_idx_sorted=None) return clf clf = classify(features_train, labels_train) # store your predictions in a list named pred # pred = clf.predict(features_test) # acc = accuracy_score(pred, labels_test) acc = clf.score(features_test, labels_test) def submitAccuracy(): return acc print(acc) #### grader code, do not modify below this line prettyPicture(clf, features_test, labels_test) output_image('test.png')
import sys from svm_classifier import classify sys.path.append("../tools/") from prep_terrain_data import make_terrain_data from class_vis import pretty_picture, output_image features_train, labels_train, features_test, labels_test = make_terrain_data() clf, accuracy = classify(features_train, labels_train, features_test, labels_test) # draw the decision boundary with the text points overlaid pretty_picture(clf, features_test, labels_test, "svm_speed.png") output_image("svm_speed.png", "png", open("svm_speed.png", "rb").read())
features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] bumpy_fast = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 0 ] grade_slow = [ features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] bumpy_slow = [ features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii] == 1 ] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("result.png", "png", open("result.png", "rb").read())
def classify(features_train, labels_train): ### import the sklearn module for GaussianNB ### create classifier ### fit the classifier on the training features and labels ### return the fit classifier ### your code goes here! import numpy as np import pylab as pl features_train, labels_train, features_test, labels_test = makeTerrainData() ### the training data (features_train, labels_train) have both "fast" and "slow" points mixed ### in together--separate them so we can give them different colors in the scatterplot, ### and visually identify them grade_fast = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==0] bumpy_fast = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==0] grade_slow = [features_train[ii][0] for ii in range(0, len(features_train)) if labels_train[ii]==1] bumpy_slow = [features_train[ii][1] for ii in range(0, len(features_train)) if labels_train[ii]==1] # You will need to complete this function imported from the ClassifyNB script. # Be sure to change to that code tab to complete this quiz. clf = classify(features_train, labels_train) ### draw the decision boundary with the text points overlaid prettyPicture(clf, features_test, labels_test) output_image("test.png", "png", open("test.png", "rb").read())