if __name__ == '__main__' : # for loading the existing data set pickle # pickledata = open('pickledata','rb') # attrs = pickle.load(pickledata) # data = pickle.load(pickledata) # for loading the new data set # dataset = 'tennis.arff' # dataset = 'restaurant.arff' dataset = 'breast-cancer.arff' # dataset = 'nursery.arff' # dataset = 'lymphography.arff' attrs, data = readARFF.readArff(open(dataset)) print '##### %s dataset #####' %dataset # # print len(data) # attrslist = readARFF.getAttrList(attrs) # root = makeTree(data, attrslist, attrs, readARFF.computeZeroR(data)) # root.printTree() evaluation(5, attrs, data) evaluation_zeroR(5, data)
p = computePrecision(TPCount,FPCount,TNCount,FNCount) r = computeRecall(TPCount,FPCount,TNCount,FNCount) a = computeAccuracy(TPCount,FPCount,TNCount,FNCount) evalResult[c] = (p,r,a) drawChart(evalResult) return evalResult if __name__ == '__main__' : if len(sys.argv) < 2 : print "Usage: decisionTree.py #datasetName" sys.exit(-1) fname = sys.argv[-1] (attrs, data, classification) = readARFF.readArff(open(fname)) resultTest = {} resultTrain = {} resultZeroR = {} for time in range(5): print "Round ",time+1,":" index = range(len(data)) trainSample = random.sample(index,int(len(data)*0.8)) testSample = [i for i in index if i not in trainSample] trainDataset = [data[i] for i in trainSample] testDataset = [data[i] for i in testSample] print "\nUsing ZeroR:" rz = evalZeroR(trainDataset,testDataset,classification,attrs)
print ' training_precision:' print_pr_re(train_precision_average) print ' training_recall:' print_pr_re(train_recall_average) print ' training_accuracy: %f%%' % (train_accuracy_average * 100) if __name__ == '__main__': # for loading the existing data set pickle # pickledata = open('pickledata','rb') # attrs = pickle.load(pickledata) # data = pickle.load(pickledata) # for loading the new data set # dataset = 'tennis.arff' # dataset = 'restaurant.arff' dataset = 'breast-cancer.arff' # dataset = 'nursery.arff' # dataset = 'lymphography.arff' attrs, data = readARFF.readArff(open(dataset)) print '##### %s dataset #####' % dataset # # print len(data) # attrslist = readARFF.getAttrList(attrs) # root = makeTree(data, attrslist, attrs, readARFF.computeZeroR(data)) # root.printTree() evaluation(5, attrs, data) evaluation_zeroR(5, data)
if correctPredicted[item] == 0: Precision[item] = 0 Recall[item] = 0 else: Precision[item] = float(correctPredicted[item]) / float(timesPredicted[item]) Recall[item] = float(correctPredicted[item]) / float(exampleLabeled[item]) allCorrect += correctPredicted[item] accuracy = float(allCorrect) / len(testingData) print "For multi-class -----------" print "Precision: ", Precision print "Recall: ", Recall print "Accuracy: ", accuracy ### This part need be modified for different dataset def isPositive(result) : return "no-recurrence-events" in result if __name__ == '__main__' : filename = 'nursery.arff' attributes = readARFF.readArff(open(filename))[0] data = readARFF.readArff(open(filename))[1] alist = readARFF.getAttrList(attributes) trainingData, testingData = createTrainAndTestData(data) #runEvaluation5times(data, attributes, None) #runZeroREvaluation5times(data, attributes, None) calculateForMultiClass(trainingData, testingData, attributes, None)
else: node.children[value] = makeTree(subSet, aList, attributes, readARFF.computeZeroR(attributes, subSet)) return node def printNode(root): print root if len(root.children) != 0: for k in root.children: child = root.children[k] printNode(child) if __name__ == '__main__': fileName = sys.argv[-1] attributes, data = readARFF.readArff(open(fileName)) listAttributes = readARFF.getAttrList(attributes) times = 5 total = zero = 0 precision = recall = precisionZero = recallZero = 0 for i in range(times): trainData = random.sample(data, int(len(data) * 0.8)) defaultValue = readARFF.computeZeroR(attributes, data) zeroRValue = readARFF.computeZeroR(attributes, trainData) root = makeTree(trainData, listAttributes, attributes, defaultValue) #printNode(root) TP = tp = 0 testData = [] for d in data: if d not in trainData: testData.append(d)
if __name__ == '__main__': parser = argparse.ArgumentParser(description=""" Simply use to test: `python decisionTree.py ARFF_FILE` Wanzhang Sheng, Copyright 2013, GPL """) parser.add_argument('arff_file', help='The source ARFF file.') parser.add_argument('--verbose', dest='verbose', action='store_true', help="Be verbose to debug.") args = parser.parse_args() VERBOSE = args.verbose (attrs, data, classify_attr) = readARFF.readArff(open(args.arff_file)) domain = classify_attr.values()[0] random.seed() print ('=' + args.arff_file).ljust(25,'=') total = init_statistics(domain) total_noise = 0 for time in range(0,5): print " \033[93m-%d time-\033[0m" % (time+1) random.shuffle(data) sp = int(len(data)*4/5) train_data = data[:sp] test_data = data[sp+1:] root = makeTree(train_data, attrs) # noise