def test_accuracy(rootNode): print("Testing Accuracy of the Algorithm") testCptTable = parse_training_data('data/featnames.csv','data/testfeat.csv','data/testlabs.csv') num_corrects = 0 num_wrongs = 0 for row in testCptTable: #for each row test the accuracy if not IsCompliant(row, rootNode): num_wrongs+=1 else: num_corrects+=1 total_count = num_corrects+num_wrongs accuracy = (num_corrects/total_count)*100 print(" Accuracy Percentage ="+str(accuracy))
def test_accuracy(rootNode): print("Testing Accuracy of the Algorithm") testCptTable = parse_training_data('data/featnames.csv', 'data/testfeat.csv', 'data/testlabs.csv') num_corrects = 0 num_wrongs = 0 for row in testCptTable: #for each row test the accuracy if not IsCompliant(row, rootNode): num_wrongs += 1 else: num_corrects += 1 total_count = num_corrects + num_wrongs accuracy = (num_corrects / total_count) * 100 print(" Accuracy Percentage =" + str(accuracy))
def main(): print(" Please choose if you want to use chi-square:") print(" 1. Do not use Chi-Square") print(" 2. Use Chi-Square") userInput = input(" Enter your choice:") global p_value useChiSq = False if userInput == 1: useChiSq = False elif userInput == 2: userPvalue = input(" Enter p value, Valid values (0.01|0.05|1):") if (userPvalue != 0.01) and (userPvalue != 0.05) and (userPvalue != 1): print(" Please enter a valid P value, exiting") return p_value = str(userPvalue) useChiSq = True else: print(" Please make a valid choice, exiting ") return global total_size total_size = 0 print(" Given p value " + str(p_value)) #CPT table is a list of rows with attributes and values, approximately of 40k size print(" Parsing Training Data") cptTable = parse_training_data('data/featnames.csv', 'data/trainfeat.csv', 'data/trainlabs.csv') global total_num_rows total_num_rows = len(cptTable) attributes = cptTable[0].keys() attributes.remove('nextPage') print(" Building decision tree") root_node = build_id3(cptTable, attributes, useChiSq) #bestInfoGainAttr(cptTable) #print_id3(root_node) test_accuracy(root_node) calculatespace(root_node) print("Total Size (Nodes in decision tree)=" + str(total_size))
def main(): print(" Please choose if you want to use chi-square:") print(" 1. Do not use Chi-Square") print(" 2. Use Chi-Square") userInput = input(" Enter your choice:") global p_value useChiSq = False if userInput == 1: useChiSq = False elif userInput == 2: userPvalue = input(" Enter p value, Valid values (0.01|0.05|1):") if (userPvalue != 0.01) and (userPvalue != 0.05) and (userPvalue != 1): print(" Please enter a valid P value, exiting") return p_value = str(userPvalue) useChiSq = True else: print(" Please make a valid choice, exiting ") return global total_size total_size =0 print(" Given p value "+str(p_value)) #CPT table is a list of rows with attributes and values, approximately of 40k size print(" Parsing Training Data") cptTable = parse_training_data('data/featnames.csv','data/trainfeat.csv','data/trainlabs.csv') global total_num_rows total_num_rows = len(cptTable) attributes = cptTable[0].keys() attributes.remove('nextPage') print(" Building decision tree") root_node = build_id3(cptTable, attributes, useChiSq) #bestInfoGainAttr(cptTable) #print_id3(root_node) test_accuracy(root_node) calculatespace(root_node) print("Total Size (Nodes in decision tree)="+str(total_size))