Пример #1
0
#KNN anaysis
import util
from sklearn.neighbors import KNeighborsClassifier

c, f, t_C, t_f = util.load_letter_data_set()
k = 1
classifier = KNeighborsClassifier(n_neighbors=k,
                                  algorithm='ball_tree',
                                  weights='distance')
classifier.fit(f, c)
predictions = classifier.predict(t_f)
error_rate = len(t_C[predictions != t_C]) / len(t_C)
training_predictions = classifier.predict(f)
training_error_rate = len(c[training_predictions != c]) / len(c)
print("letter training error rate {}".format(training_error_rate))
print("letter error rate {}".format(error_rate))

c, f, t_C, t_f = util.load_census_data_set()
k = 3
classifier = KNeighborsClassifier(n_neighbors=k,
                                  algorithm='ball_tree',
                                  weights='distance')
classifier.fit(f, c)
predictions = classifier.predict(t_f)
error_rate = len(t_C[predictions != t_C]) / len(t_C)
training_predictions = classifier.predict(f)
training_error_rate = len(c[training_predictions != c]) / len(c)
print("Income training error rate {}".format(training_error_rate))
print("Income error rate {}".format(error_rate))
import util
import string
import pydotplus
from sklearn import tree
cc, ff, t_cc, t_ff = util.load_census_data_set()

clf = tree.DecisionTreeClassifier(max_depth=4, min_samples_leaf=5).fit(ff,cc)
predictions = clf.predict(t_ff)
training_predictions = clf.predict(ff)
training_error_rate = len(cc[training_predictions!=cc])/len(cc)
test_error_rate = len(t_cc[predictions!=t_cc])/len(t_cc)
print("income training error rate {}".format(training_error_rate))
print("income test error rate {}".format(test_error_rate))

class_names = ['<=50K', '>50K']
feature_names = ['age', 'workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'weekly-work-hours', 'native country']
dot_data = tree.export_graphviz(clf, class_names=class_names, feature_names=feature_names, out_file=None) 
graph = pydotplus.graph_from_dot_data(dot_data) 
graph.write_png("census_d_tree.png") 



c, f, t_C, t_f = util.load_letter_data_set()
classifier = tree.DecisionTreeClassifier(max_depth=26, min_samples_leaf=5)
classifier.fit(f,c)
predictions = classifier.predict(t_f)
error_rate = len(t_C[predictions!=t_C])/len(t_C)
training_predictions = classifier.predict(f)
training_error_rate = len(c[training_predictions!=c])/len(c)
print("letter training error rate {}".format(training_error_rate))
print("letter error rate {}".format(error_rate))