Exemplo n.º 1
0
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fname).read()
     self.tree = buildtree(text)
     self.tree = binarizetree(self.tree)
     self.tree = backprop(self.tree)
Exemplo n.º 2
0
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fname).read()
     self.tree = buildtree(text)
     self.tree = binarizetree(self.tree)
     self.tree = backprop(self.tree)
def accuracyFold(dataset=None,f=None,path="./Data"):
	
	foldPath=path+"/" + dataset + "/folds/"
	trainFoldData=pickle.load(open(foldPath + "trainFold_"+str(f)+".p","rb"))

	#Training tree on pessimistic mode
	tree=buildtree(trainFoldData)
	
	testFoldData=pickle.load(open(foldPath + "testFold_"+str(f)+".p","rb"))
	
	trueMatch=0
	totalCmp=0
	actual=[]
	predicted=[]	
	for obs in testFoldData:
			totalCmp=totalCmp+1
			result=mdclassify(obs,tree)
			#print("outcome is",result)
			actual.append(obs[-1])
			predicted.append(result)

			if obs[-1] == result:
				trueMatch=trueMatch+1
	accuracy=trueMatch/float(totalCmp)
	createLabels(dataset=dataset)
	labels=pickle.load(open(path+"/"+dataset+"/labels.p","rb"))
	target_names = labels
	print(classification_report(actual, predicted ,target_names=target_names))
	'''
def accuracy10Fold(dataset=None,path="./Data"):
	
    # Training & Testing on corresponding Train Test pair.
	accuracyList=[]
	for f in range(1,11):
		#print("Building model from Training fold ",f)
		foldPath="./result/iris/iris"
		
		#Loading trainFold
		trainFoldData=pickle.load(open(foldPath+"train"+str(f)+".p","rb"))
		#trainFoldData=pickle.load(open("./Data/iris/folds/trainFold_1.p","rb"))
		
		#making normal decision Tree on this trainFold
		tree=buildtree(trainFoldData)
		

		#making pessimistic decision tree on this trainfold
		'''
		tree=buildtree_pessimistic(trainFoldData)
		'''

		'''
		writepath_pes=path+ "/"+ dataset + "/treeview_pes_" +str(f)+ ".jpg"
		colname=['sepal_length','sepal_width','petal_length','petal_width']
		drawtree(tree=tree,jpeg=writepath_pes,colname=colname)
		'''

		#print("Testing model for Testing fold",f)
		#Loading tesfold to check accuracy of the model
		testFoldData=pickle.load(open(foldPath+"test"+str(f)+".p","rb"))
		
		# Checking target class for each observation in test fold
		trueMatch=0
		totalCmp=0
		actual=[]
		predictions=[]
		for obs in testFoldData:
			#print(f)
			#print("obs is",obs)
			totalCmp=totalCmp+1
			result=mdclassify(obs,tree)
			#print("outcome is",result)
			#print (obs[-1],result)
			#if obs[-1] == result.keys()[0]: #i,e Target class is predicted correctly
			actual.append(obs[-1])
			predictions.append(result)
			if obs[-1] == result:
				trueMatch=trueMatch+1
		accuracy=trueMatch/float(totalCmp)
		print("Accuracy for Testing fold",f ,accuracy)
		accuracyList.append(accuracy)
		#print(accuracyList)
	finalAccuracy=sum(accuracyList)/float(10)
	print("Final Accuracy of " ,dataset,finalAccuracy)
	return finalAccuracy
Exemplo n.º 5
0
    def build(self, strTree, vocab):
        """ (1) Read structure from file, and
            (2) build tree structure, and
            (3) initialize leaf node

        Refer buildtree.py for more detail

        :type strTree: string
        :param strTree: string of the tree information

        :type vocab: Vocab instance
        :param vocab: word vocab with representation
        """
        self.root = buildtree(strTree, vocab)
def accuracy10Fold(dataset=None,path="./Data"):
	
    # Training & Testing on corresponding Train Test pair.
	accuracyList=[]
	for f in range(1,11):
		#print("Building model from Training fold ",f)
		foldPath=path+"/" + dataset + "/folds/"
		
		#Loading trainFold
		trainFoldData=pickle.load(open(foldPath + "trainFold_"+str(f)+".p","rb"))
		#trainFoldData=pickle.load(open("./Data/iris/folds/trainFold_1.p","rb"))
		
		#making normal decision Tree on this trainFold
		tree=buildtree(trainFoldData)
		
		#pruning the tree using pessimistic error or mdl error
		#tree=prune(tree)

		#making pessimistic decision tree on this trainfold
		

		#print("Testing model for Testing fold",f)
		#Loading tesfold to check accuracy of the model
		testFoldData=pickle.load(open(foldPath + "testFold_"+str(f)+".p","rb"))
		
		# Checking target class for each observation in test fold
		trueMatch=0
		totalCmp=0
		actual=[]
		predictions=[]
		for obs in testFoldData:
			#print("obs is",obs)
			totalCmp=totalCmp+1
			result=mdclassify(obs,tree)
			#print("outcome is",result)
			#print (obs[-1],result)
			#if obs[-1] == result.keys()[0]: #i,e Target class is predicted correctly
			actual.append(obs[-1])
			predictions.append(result)
			if obs[-1] == result:
				trueMatch=trueMatch+1
		accuracy=trueMatch/float(totalCmp)
		print("Accuracy for Testing fold",f ,accuracy)
		accuracyList.append(accuracy)
		#print(accuracyList)
	finalAccuracy=sum(accuracyList)/float(10)
	print("Final Accuracy of " ,dataset,finalAccuracy)
	return finalAccuracy
Exemplo n.º 7
0
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fdis).read()
     # Build RST as annotation
     self.tree = buildtree(text)
     # Binarize it
     self.tree = binarizetree(self.tree)
     # Read doc file
     if isfile(self.fmerge):
         dr = DocReader()
         self.doc = dr.read(self.fmerge)
     else:
         raise IOError("File doesn't exist: {}".format(self.fmerge))
     # Prop information from doc on the binarized RST tree
     self.tree = backprop(self.tree, self.doc)
Exemplo n.º 8
0
Arquivo: tree.py Projeto: OlafLee/DPLP
 def build(self):
     """ Build BINARY RST tree
     """
     text = open(self.fdis).read()
     # Build RST as annotation
     self.tree = buildtree(text)
     # Binarize it
     self.tree = binarizetree(self.tree)
     # Read doc file
     if isfile(self.fmerge):
         dr = DocReader()
         self.doc = dr.read(self.fmerge)
     else:
         raise IOError("File doesn't exist: {}".format(self.fmerge))
     # Prop information from doc on the binarized RST tree
     self.tree = backprop(self.tree, self.doc)
def printtree(dataset=None,path="./Data",filename="data.p"):
        filepath=path+ "/"+ dataset + "/" + filename
        data=pickle.load(open(filepath,"rb"))
        

        writepath=path+ "/"+ dataset + "/treeview.jpg"
        writepath_pes=path+ "/"+ dataset + "/treeview_pes.jpg"
        writepath_prun=path+ "/"+ dataset + "/treeview_prun_mdl.jpg"

        tree=buildtree(rows=data[1:len(data)])
        drawtree(tree=tree,jpeg=writepath,colname=data[0]) 
        
        tree_prun=prune(tree)
        drawtree(tree=tree_prun,jpeg=writepath_prun,colname=data[0]) 
        tree_pes=buildtree_pessimistic(rows=data[1:len(data)])
        #colname=['sepal_length','sepal_width','petal_length','petal_width']
        
        
        drawtree(tree=tree_pes,jpeg=writepath_pes,colname=data[0])
        drawtree(tree=tree_pes,jpeg=writepath_pes,colname=colname)
        drawtree(tree=prune(tree,0.3),jpeg=writepath,colname=data[0])
Exemplo n.º 10
0
def accuracy(filename):
    accuracyList = []
    for iteration in range(1, 11):

        subpath = "./result/" + filename + "/" + filename

        train_data = pickle.load(
            open(subpath + "train" + str(iteration) + ".p", "rb"))
        tree = buildtree(train_data)
        test_data = pickle.load(
            open(subpath + "test" + str(iteration) + ".p", "rb"))
        correct = 0
        comparison = 0
        for each in test_data:
            comparison += 1
            predicted_class = mdclassify(each, tree)
            if each[-1] == predicted_class:
                correct += 1
        test_accuracy = correct / float(comparison)
        accuracyList.append(test_accuracy)
    accuracy = sum(accuracyList) / float(10)
    print("Final Accuracy:" + str(accuracy * 100))
    return accuracy * 100
Exemplo n.º 11
0
def main():
#    header = ['sepal_length','sepal_width','petal_length','petal_width','class']
#    data = data_formatter.data_formatter("./data/","iris","./result/iris",header)
#    tree = buildtree(data)
#    printtree(tree)
#    drawtree(tree=tree,jpeg="./result/iris/tree-iris.jpg",colname=data[0])
#    genfolds("./result/","iris","./result/")
#    acc = accuracy("iris")
#    prune(tree)
#    drawtree(tree=tree,jpeg="./result/iris/tree-prune-iris.jpg",colname=data[0])
#    print("header")    
#    header_file = open("./data/banknote/header.p")
#    header2 = pickle.load(header_file)
#    print("header2")
#    #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2)
#    data_file = open("./data/banknote/data.p")
#    data2 = pickle.load(data_file)
#    print("header3")
#    tree2 = buildtree(data2)
#    print("header44")
#    printtree(tree2)
#    print("header45")
#    drawtree(tree=tree2,jpeg="./result/banknote/tree-banknote.jpg",colname=data2[0])
#    print("header5")
#    genfolds("./result/","banknote","./result/")
#    acc = accuracy("banknote")
#    tree2 = prune(tree2)
#    drawtree(tree=tree2,jpeg="./result/banknote/tree-banknote-iris.jpg",colname=data2[0])
#    print("header")
#    header3 = "./data/banknote/header-c.p"    
#    print("header3")
#    data2 = data_formatter.data_formatter("./data/","car","./result/car",header3)
#    print("header3")
#    tree2 = buildtree(data3)
#    print("header44")
#    printtree(tree3)
#    print("header45")
#    drawtree(tree=tree3,jpeg="./result/car/tree-car.jpg",colname=data[0])
#    print("header5")
#    genfolds("./result/","car","./result/")
#    acc = accuracy("car")
#    tree2 = prune(tree2)
#    drawtree(tree=tree2,jpeg="./car/banknote/tree-car.jpg",colname=data[0])
#    
#    print("header")    
#    header_file = open("./data/haberman/header.p")
#    header2 = pickle.load(header_file)
#    print("header2")
#    #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2)
#    data_file = open("./data/haberman/data.p")
#    data2 = pickle.load(data_file)
#    print("header3")
#    tree2 = buildtree(data2)
#    print("header44")
#    printtree(tree2)
#    print("header45")
#    drawtree(tree=tree2,jpeg="./result/haberman/tree-haberman.jpg",colname=data2[0])
#    print("header5")
#    genfolds("./result/","haberman","./result/")
#    acc = accuracy("haberman")
#    tree2 = prune(tree2)
#    drawtree(tree=tree2,jpeg="./result/haberman/haberman.jpg",colname=data2[0])
#    print("header")

    print("header")   
    header_file = open("./data/wine/header.p")
    header2 = pickle.load(header_file)
    print("wine")
    #data2 = data_formatter.data_formatter("./data/","banknote","./result/banknote",header2)
    data_file = open("./data/wine/data.p")
    data2 = pickle.load(data_file)
    print("header3")
    tree2 = buildtree(data2)
    print("header44")
    printtree(tree2)
    print("header45")
    drawtree(tree=tree2,jpeg="./result/wine/tree-wine.jpg",colname=data2[0])
    print("header5")
    genfolds("./result/","wine","./result/")
    acc = accuracy("wine")
    tree2 = prune(tree2)
    drawtree(tree=tree2,jpeg="./result/wine/wine.jpg",colname=data2[0])
    print("header")