def __init__(self):
        self.instances1 = Instance.read('./data/test1.dat')
        dt = DecisionTree(self.instances1, Id3())
        dpath = dt.train()
        dpath.dump('./data/test1.dat.path')
        self.path1 = DecisionTreeResult.load('./data/test1.dat.path')

        self.instances2 = Instance.read('./data/test2.dat')
        dt = DecisionTree(self.instances2, Id3())
        dpath = dt.train()
        dpath.dump('./data/test2.dat.path')
        self.path2 = DecisionTreeResult.load('./data/test2.dat.path')

        self.dtr = DecisionTreeRefiner()
Exemplo n.º 2
0
def training(train_filepath):
    train_data = Instance.read(train_filepath)

    algo = Id3()
    dt = DecisionTree(train_data, algo)
    path = dt.train()
    return path
Exemplo n.º 3
0
def validation(valid_filepath, path):
    validation_data = Instance.read(valid_filepath)

    r = DecisionTreeRefiner()
    result = r.refine(path, validation_data)

    return result
Exemplo n.º 4
0
 def __init__(self):
     self.instances1 = Instance.read('./data/test1.dat')
     self.instances2 = Instance.read('./data/test2.dat')
     self.gt1 = DecisionTreeResult([
         Path([('a', True), ('b', True), ('c', True)], 0),
         Path([('a', True), ('b', True), ('c', False)], 1),
         Path([('a', True), ('b', False)], 0),
         Path([('a', False), ('c', True)], 1),
         Path([('a', False), ('c', False), ('q', True)], 1),
         Path([('a', False), ('c', False), ('q', False), ('e', True)], 1),
         Path([('a', False), ('c', False), ('q', False), ('e', False)], 0)
         ])
     self.gt2 = DecisionTreeResult([
         Path([('a', True), ('b', True), ('c', True)], 0),
         Path([('a', True), ('b', True), ('c', False)], 1),
         Path([('a', True), ('b', False)], 0),
         Path([('a', False), ('c', True)], 1),
         Path([('a', False), ('c', False), ('e', True)], 1),
         Path([('a', False), ('c', False), ('e', False)], 0)
         ])
Exemplo n.º 5
0
    from data import Instance
    from algo import Id3, Gini
    from fmeasure import Fmeasure
    import time
    import optparse
    import os
    parser = optparse.OptionParser(usage="usage: %prog [options] filepath")
    parser.add_option("-a", type="choice", choices=['id3', 'gini', 'f'],
        dest="algo", help="algorithm for decision tree", default="id3")
    (options, args) = parser.parse_args()
    if len(args) == 0:
        parser.error("needs filepath")

    start_time = time.clock()
    filepath = args[0]
    instances = Instance.read(filepath)
    print '%s used (#pos: %s, #neg: %s)' % (filepath, 
        len([d for d in instances if d.label == 1]),
        len([d for d in instances if d.label != 1]))

    if options.algo == 'id3':
        algo = Id3()
    elif options.algo == 'gini':
        algo = Gini()
    elif options.algo == 'f':
        relnum = len([d for d in instances if d.label == 1])
        algo = Fmeasure(relnum)

    dt = DecisionTree(instances, algo)
    dpath = dt.train()
    print "Paths generated:"