def __init__(self): self.instances1 = Instance.read('./data/test1.dat') dt = DecisionTree(self.instances1, Id3()) dpath = dt.train() dpath.dump('./data/test1.dat.path') self.path1 = DecisionTreeResult.load('./data/test1.dat.path') self.instances2 = Instance.read('./data/test2.dat') dt = DecisionTree(self.instances2, Id3()) dpath = dt.train() dpath.dump('./data/test2.dat.path') self.path2 = DecisionTreeResult.load('./data/test2.dat.path') self.dtr = DecisionTreeRefiner()
def training(train_filepath): train_data = Instance.read(train_filepath) algo = Id3() dt = DecisionTree(train_data, algo) path = dt.train() return path
def validation(valid_filepath, path): validation_data = Instance.read(valid_filepath) r = DecisionTreeRefiner() result = r.refine(path, validation_data) return result
def __init__(self): self.instances1 = Instance.read('./data/test1.dat') self.instances2 = Instance.read('./data/test2.dat') self.gt1 = DecisionTreeResult([ Path([('a', True), ('b', True), ('c', True)], 0), Path([('a', True), ('b', True), ('c', False)], 1), Path([('a', True), ('b', False)], 0), Path([('a', False), ('c', True)], 1), Path([('a', False), ('c', False), ('q', True)], 1), Path([('a', False), ('c', False), ('q', False), ('e', True)], 1), Path([('a', False), ('c', False), ('q', False), ('e', False)], 0) ]) self.gt2 = DecisionTreeResult([ Path([('a', True), ('b', True), ('c', True)], 0), Path([('a', True), ('b', True), ('c', False)], 1), Path([('a', True), ('b', False)], 0), Path([('a', False), ('c', True)], 1), Path([('a', False), ('c', False), ('e', True)], 1), Path([('a', False), ('c', False), ('e', False)], 0) ])
from data import Instance from algo import Id3, Gini from fmeasure import Fmeasure import time import optparse import os parser = optparse.OptionParser(usage="usage: %prog [options] filepath") parser.add_option("-a", type="choice", choices=['id3', 'gini', 'f'], dest="algo", help="algorithm for decision tree", default="id3") (options, args) = parser.parse_args() if len(args) == 0: parser.error("needs filepath") start_time = time.clock() filepath = args[0] instances = Instance.read(filepath) print '%s used (#pos: %s, #neg: %s)' % (filepath, len([d for d in instances if d.label == 1]), len([d for d in instances if d.label != 1])) if options.algo == 'id3': algo = Id3() elif options.algo == 'gini': algo = Gini() elif options.algo == 'f': relnum = len([d for d in instances if d.label == 1]) algo = Fmeasure(relnum) dt = DecisionTree(instances, algo) dpath = dt.train() print "Paths generated:"