def Pruning_the_Tree(): print '>>Pruning the Tree' reload(treepredict) tree=treepredict.buildtree(treepredict.my_data) print '------------------' treepredict.prune(tree,0.1) treepredict.printtree(tree) treepredict.prune(tree,1.0) treepredict.printtree(tree)
def main(rows): # fruits with their colors and size tree = treepredict.buildtree(rows) # print(treepredict.classify([2, 'red'], tree)) # print(treepredict.classify([5, 'red'], tree)) # print(treepredict.classify([1, 'green'], tree)) # 决策树 treepredict.printtree(tree) treepredict.drawtree(tree, jpeg='treeview.jpg')
import treepredict import preprocessor import postprocessor import arff import copy label_count = 6 train_data_file = '.\\scene\\scene-train-tiny.arff' test_data_file = '.\\scene\\scene-test-tiny.arff' method = input('1 单标签;2 多个二类分类') if method == '1': #读取训练集,建树(多标签转换成单标签) (attributes_list, label_value_list,train_data) = preprocessor.read_data(train_data_file, label_count, arff.DENSE) train_data = preprocessor.translate_label_multiclass(train_data, label_count) tree = treepredict.buildtree(train_data, attributes_list, label_value_list) treepredict.printtree(tree) #读取测试集,验证效果 (test_attributes_list, test_label_value_list, test_data) = preprocessor.read_data(test_data_file, label_count, arff.DENSE) test_data_copy = copy.deepcopy(test_data) predicted_labels_list = [] for row in test_data: result = treepredict.classify(row, tree, test_attributes_list) post_result = treepredict.post_classify(result) decoded_result = preprocessor.label_decoding(post_result) predicted_labels_list.append(decoded_result) hamming_loss = postprocessor.hamming_loss(test_data_copy, predicted_labels_list) print('hamming loss of merging labels:', hamming_loss) else : #当做多个二类分类问题处理
import treepredict # fruits with their colors and size fruits = [[4, 'red', 'apple'], [4, 'green', 'apple'], [1, 'red', 'cherry'], [1, 'green', 'grape'], [5, 'red', 'apple']] tree = treepredict.buildtree(fruits) treepredict.classify([2, 'red'], tree) treepredict.classify([5, 'red'], tree) treepredict.classify([1, 'green'], tree) treepredict.printtree(tree) #treepredict.drawtree(tree, jpeg='treeview.jpg')
import treepredict # fruits with their colors and size fruits = [ [4, 'red', 'apple'], [4, 'green', 'apple'], [1, 'red', 'cherry'], [1, 'green', 'grape'], [5, 'red', 'apple'] ] tree = treepredict.buildtree(fruits) treepredict.classify([2, 'red'], tree) treepredict.classify([5, 'red'], tree) treepredict.classify([1, 'green'], tree) treepredict.printtree(tree) #treepredict.drawtree(tree, jpeg='treeview.jpg')
import treepredict as tr tree = tr.buildtree(tr.my_data) tr.printtree(tree) print tr.mdclassify(['google',None,'yes',None],tree) print tr.mdclassify(['google','France',None,None],tree)
def Recursive_Tree_Building(): print '>>Recursive Tree Building' reload(treepredict) tree=treepredict.buildtree(treepredict.my_data) treepredict.printtree(tree)
increments=[0] for i in xrange(1,10): x=10**(-i) increments.append(x) print 'Increments to be tested and passed to gain_increments',increments accuracyTest=testing_gain_increments(increments) #print accuracyTest values=accuracyTest.keys() values.sort(cmp=lambda a,b:cmp(accuracyTest[a],accuracyTest[b])) print 'Increment value with best classification rate was ',values[-1] # Let's see what it looks like... #print "\nFinal tree...\n" treepredict.printtree(treepredict.buildtree(train_data,gain_increment=values[-1],gain_threshold=0,instance_minimum=1)) # Produce a png of the tree treepredict.drawtree(tree,jpeg="sample_tree.jpg") #print "\npng of tree generated using PIL (Python Imaging Library) modules.\n" # Let's classify an incoming record of '(direct), USA, yes, 5' ... #incoming = ['(direct)','USA','yes',5] #print "Prediction of new record: ",treepredict.classify(incoming,tree) # Finally, what does pruning do with say a mingain = 0.9 ? #print "\nPruned tree...\n" #treepredict.prune(tree,0.9) #treepredict.printtree(tree) # For group homework, modify "buildtree" function so that it stops
for i in xrange(1, 10): x = 10**(-i) increments.append(x) print 'Increments to be tested and passed to gain_increments', increments accuracyTest = testing_gain_increments(increments) #print accuracyTest values = accuracyTest.keys() values.sort(cmp=lambda a, b: cmp(accuracyTest[a], accuracyTest[b])) print 'Increment value with best classification rate was ', values[-1] # Let's see what it looks like... #print "\nFinal tree...\n" treepredict.printtree( treepredict.buildtree(train_data, gain_increment=values[-1], gain_threshold=0, instance_minimum=1)) # Produce a png of the tree treepredict.drawtree(tree, jpeg="sample_tree.jpg") #print "\npng of tree generated using PIL (Python Imaging Library) modules.\n" # Let's classify an incoming record of '(direct), USA, yes, 5' ... #incoming = ['(direct)','USA','yes',5] #print "Prediction of new record: ",treepredict.classify(incoming,tree) # Finally, what does pruning do with say a mingain = 0.9 ? #print "\nPruned tree...\n" #treepredict.prune(tree,0.9) #treepredict.printtree(tree)